├── .gitignore
├── LICENSE
├── Makefile
├── ari
    ├── compbit.c
    ├── complen.c
    ├── complit.c
    ├── port.h
    ├── rangecod.c
    └── rangecod.h
├── bpe.h
├── divsufsort.c
├── divsufsort.h
├── e8.h
├── lzoma.h
├── pack.c
├── readme.MSVC
├── readme.txt
├── unpack.c
├── unpack_lzoma.S
└── x86
    ├── Makefile
    └── x86.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Object files
 2 | *.o
 3 | *.ko
 4 | *.obj
 5 | *.elf
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Libraries
12 | *.lib
13 | *.a
14 | *.la
15 | *.lo
16 | 
17 | # Shared objects (inc. Windows DLLs)
18 | *.dll
19 | *.so
20 | *.so.*
21 | *.dylib
22 | 
23 | # Executables
24 | *.exe
25 | *.out
26 | *.app
27 | *.i*86
28 | *.x86_64
29 | *.hex
30 | 
31 | # Debug files
32 | *.dSYM/
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 
341 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all:
 2 | 	gcc -O2 -pipe pack.c divsufsort.c -o pack
 3 | 	gcc -Os -fomit-frame-pointer -std=c99 -Os -pipe unpack.c -o unpack
 4 | 
 5 | asm_x86:
 6 | 	gcc -O2 -pipe pack.c divsufsort.c -o pack
 7 | 	gcc -DASM_X86 -m32 -Os -fomit-frame-pointer -std=c99 -pipe unpack.c unpack_lzoma.S -o unpack
 8 | 
 9 | test:
10 | 	./pack pack.c pack.c.lzoma && ./unpack pack.c.lzoma pack.c.test && md5sum pack.c pack.c.test
11 | 


--------------------------------------------------------------------------------
/ari/compbit.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |   comp.c     headerfile for quasistatic probability model
 3 | 
 4 |   (c) Michael Schindler
 5 |   1997, 1998, 1999, 2000
 6 |   http://www.compressconsult.com/
 7 |   michael@compressconsult.com
 8 | 
 9 |   This program is free software; you can redistribute it and/or modify
10 |   it under the terms of the GNU General Public License as published by
11 |   the Free Software Foundation; either version 2 of the License, or
12 |   (at your option) any later version.
13 | 
14 |   This program is distributed in the hope that it will be useful,
15 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
16 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 |   GNU General Public License for more details.  It may be that this
18 |   program violates local patents in your country, however it is
19 |   belived (NO WARRANTY!) to be patent-free here in Austria.
20 | 
21 |   You should have received a copy of the GNU General Public License
22 |   along with this program; if not, write to the Free Software
23 |   Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24 |   MA 02111-1307, USA.
25 | 
26 |   comp is an example compressor trying to compress files with a simple
27 |   order 0 model. The files can be decompressed by decomp.
28 | 
29 |   Note that I do not think that an order 0 model as here is good;
30 |   For better compression see for example my freeware szip.
31 |   http://www.compressconsult.com/szip/
32 |   or ask me as consultant what compression method fits your data best.
33 | */
34 | 
35 | #include <stdio.h>
36 | #include <stdlib.h>
37 | #ifndef unix
38 | #include <io.h>
39 | #include <fcntl.h>
40 | #endif
41 | #include <string.h>
42 | #include <ctype.h>
43 | #include "port.h"
44 | #include "rangecod.h"
45 | 
46 | void usage()
47 | {   fprintf(stderr,"comp [inputfile [outputfile]]\n");
48 |     fprintf(stderr,"comp (c)1997.1998 Michael Schindler, michael@compressconsult\n"); 
49 |     exit(1);
50 | }
51 | 
52 | int main( int argc, char *argv[] )
53 | {   int ch, syfreq, ltfreq;
54 |     rangecoder rc;
55 | 
56 |     if ((argc > 3) || ((argc>0) && (argv[1][0]=='-')))
57 |         usage();
58 | 
59 |     if ( argc<1 )
60 |         fprintf( stderr, "stdin" );
61 |     else
62 |     {   freopen( argv[1], "rb", stdin );
63 |         fprintf( stderr, "%s", argv[1] );
64 |     }
65 |     if ( argc<2 )
66 |         fprintf( stderr, " to stdout\n" );
67 |     else
68 |     {   freopen( argv[2], "wb", stdout );
69 |         fprintf( stderr, " to %s\n", argv[2] );
70 |     }
71 |     fprintf( stderr, "%s\n", coderversion);
72 | 
73 | #ifndef unix
74 |     setmode( fileno( stdin ), O_BINARY );
75 |     setmode( fileno( stdout ), O_BINARY );
76 | #endif
77 | 
78 |     start_encoding(&rc,0,0);
79 |     int prop=32768;
80 |     int prev=0;
81 | 
82 |     /* do the coding */
83 |     while ((ch=getc(stdin))!=EOF)
84 |     {  
85 |         encbit(&rc,ch,&prop);
86 |     }
87 | 
88 |     done_encoding(&rc);
89 | 
90 |     return 0;
91 | }
92 | 


--------------------------------------------------------------------------------
/ari/complen.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |   comp.c     headerfile for quasistatic probability model
  3 | 
  4 |   (c) Michael Schindler
  5 |   1997, 1998, 1999, 2000
  6 |   http://www.compressconsult.com/
  7 |   michael@compressconsult.com
  8 | 
  9 |   This program is free software; you can redistribute it and/or modify
 10 |   it under the terms of the GNU General Public License as published by
 11 |   the Free Software Foundation; either version 2 of the License, or
 12 |   (at your option) any later version.
 13 | 
 14 |   This program is distributed in the hope that it will be useful,
 15 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 |   GNU General Public License for more details.  It may be that this
 18 |   program violates local patents in your country, however it is
 19 |   belived (NO WARRANTY!) to be patent-free here in Austria.
 20 | 
 21 |   You should have received a copy of the GNU General Public License
 22 |   along with this program; if not, write to the Free Software
 23 |   Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 24 |   MA 02111-1307, USA.
 25 | 
 26 |   comp is an example compressor trying to compress files with a simple
 27 |   order 0 model. The files can be decompressed by decomp.
 28 | 
 29 |   Note that I do not think that an order 0 model as here is good;
 30 |   For better compression see for example my freeware szip.
 31 |   http://www.compressconsult.com/szip/
 32 |   or ask me as consultant what compression method fits your data best.
 33 | */
 34 | 
 35 | #include <stdio.h>
 36 | #include <stdlib.h>
 37 | #ifndef unix
 38 | #include <io.h>
 39 | #include <fcntl.h>
 40 | #endif
 41 | #include <string.h>
 42 | #include <ctype.h>
 43 | #include "port.h"
 44 | #include "rangecod.h"
 45 | 
 46 | void usage()
 47 | {   fprintf(stderr,"comp [inputfile [outputfile]]\n");
 48 |     fprintf(stderr,"comp (c)1997.1998 Michael Schindler, michael@compressconsult\n"); 
 49 |     exit(1);
 50 | }
 51 | 
 52 | int main( int argc, char *argv[] )
 53 | {   int ch1,ch2,ch3,ch4, syfreq, ltfreq;
 54 |     rangecoder rc;
 55 |     //qsmodel qsm[48];
 56 |     int prop[48];
 57 | 
 58 |     if ((argc > 3) || ((argc>0) && (argv[1][0]=='-')))
 59 |         usage();
 60 | 
 61 |     if ( argc<1 )
 62 |         fprintf( stderr, "stdin" );
 63 |     else
 64 |     {   freopen( argv[1], "rb", stdin );
 65 |         fprintf( stderr, "%s", argv[1] );
 66 |     }
 67 |     if ( argc<2 )
 68 |         fprintf( stderr, " to stdout\n" );
 69 |     else
 70 |     {   freopen( argv[2], "wb", stdout );
 71 |         fprintf( stderr, " to %s\n", argv[2] );
 72 |     }
 73 |     fprintf( stderr, "%s\n", coderversion);
 74 | 
 75 | #ifndef unix
 76 |     setmode( fileno( stdin ), O_BINARY );
 77 |     setmode( fileno( stdout ), O_BINARY );
 78 | #endif
 79 | 
 80 |     /* make an alphabet with 257 symbols, use 256 as end-of-file */
 81 | #define SMALL 25
 82 | //#define SMALL 400
 83 | int j;
 84 | for(j=0;j<48;j++) prop[j]=32768;
 85 | //    initqsmodel(&qsm[j],2,12,200,NULL,1);
 86 | 
 87 |     start_encoding(&rc,0,0);
 88 | 
 89 |     /* do the coding */
 90 |     while (1)
 91 |     {   
 92 |         int len;
 93 |         len = 0;
 94 |         if ((ch1=getc(stdin))==EOF) break;
 95 |         if ((ch2=getc(stdin))==EOF) break;
 96 |         if ((ch3=getc(stdin))==EOF) break;
 97 |         if ((ch4=getc(stdin))==EOF) break;
 98 |         len = ch4; len<<=8;
 99 |         len += ch3; len<<=8;
100 |         len += ch2; len<<=8;
101 |         len += ch1; 
102 | 	//fprintf(stderr,"%d\n",len);
103 | 	int i=0;
104 |         for(;;) {
105 | 	encbit(&rc,len&1,prop+i);i++;
106 | //          qsgetfreq(&qsm[i],len&1,&syfreq,&ltfreq);
107 |   //        encode_shift(&rc,syfreq,ltfreq,12);
108 |     //      qsupdate(&qsm[i],len&1);
109 | 	  len>>=1;
110 | //	  i++;
111 | 	  if (len==0) {
112 | 	encbit(&rc,1,prop+i);
113 |      //       qsgetfreq(&qsm[i],1,&syfreq,&ltfreq);
114 |       //      encode_shift(&rc,syfreq,ltfreq,12);
115 |        //     qsupdate(&qsm[i],1);
116 | 	    break;
117 | 	  }
118 | 	encbit(&rc,0,prop+i);i++;
119 |          //   qsgetfreq(&qsm[i],0,&syfreq,&ltfreq);
120 |           //  encode_shift(&rc,syfreq,ltfreq,12);
121 |          //   qsupdate(&qsm[i],0);
122 | 	 //   i++;
123 | 	    len--;
124 | 	}
125 |     }
126 |     /* write 256 as end-of-file */
127 | //    qsgetfreq(&qsm1,SMALL,&syfreq,&ltfreq);
128 | //    encode_shift(&rc,syfreq,ltfreq,12);
129 | 
130 |     done_encoding(&rc);
131 | 
132 |     return 0;
133 | }
134 | 


--------------------------------------------------------------------------------
/ari/complit.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #ifndef unix
 4 | #include <io.h>
 5 | #include <fcntl.h>
 6 | #endif
 7 | #include <string.h>
 8 | #include <ctype.h>
 9 | #include "port.h"
10 | #include "rangecod.h"
11 | 
12 | void usage()
13 | {   fprintf(stderr,"comp [inputfile [outputfile]]\n");
14 |     fprintf(stderr,"comp (c)1997.1998 Michael Schindler, michael@compressconsult\n"); 
15 |     exit(1);
16 | }
17 | 
18 | int main( int argc, char *argv[] )
19 | {   int ch1,ch2,ch3,ch4, syfreq, ltfreq;
20 |     rangecoder rc;
21 |     int prop[256];
22 | 
23 |     if ((argc > 3) || ((argc>0) && (argv[1][0]=='-')))
24 |         usage();
25 | 
26 |     if ( argc<1 )
27 |         fprintf( stderr, "stdin" );
28 |     else
29 |     {   freopen( argv[1], "rb", stdin );
30 |         fprintf( stderr, "%s", argv[1] );
31 |     }
32 |     if ( argc<2 )
33 |         fprintf( stderr, " to stdout\n" );
34 |     else
35 |     {   freopen( argv[2], "wb", stdout );
36 |         fprintf( stderr, " to %s\n", argv[2] );
37 |     }
38 |     fprintf( stderr, "%s\n", coderversion);
39 | 
40 | #ifndef unix
41 |     setmode( fileno( stdin ), O_BINARY );
42 |     setmode( fileno( stdout ), O_BINARY );
43 | #endif
44 | 
45 | int j;
46 | for(j=0;j<256;j++) prop[j]=32768;
47 | 
48 |     start_encoding(&rc,0,0);
49 |     /* do the coding */
50 |     while (1)
51 |     {   
52 |         unsigned char len;
53 |         if ((ch1=getc(stdin))==EOF) break;
54 |         len = ch1; 
55 | 	//fprintf(stderr,"%d\n",len);
56 | 	int ctx=1;
57 |         for(;ctx<256;) {
58 |           encbit(&rc,len>>7,prop+ctx);
59 | 	  ctx+=ctx+(len>>7);
60 | 	  len+=len;
61 | 	}
62 |     }
63 |     done_encoding(&rc);
64 | 
65 |     return 0;
66 | }
67 | 


--------------------------------------------------------------------------------
/ari/port.h:
--------------------------------------------------------------------------------
 1 | #ifndef port_h
 2 | #define port_h
 3 | #include <limits.h>
 4 | 
 5 | #ifdef GCC
 6 | #define Inline inline
 7 | #else
 8 | #define Inline __inline
 9 | #endif
10 | 
11 | #if INT_MAX > 0x7FFF
12 | typedef unsigned short uint2;  /* two-byte integer (large arrays)      */
13 | typedef unsigned int   uint4;  /* four-byte integers (range needed)    */
14 | #else
15 | typedef unsigned int   uint2;
16 | typedef unsigned long  uint4;
17 | #endif
18 | 
19 | typedef unsigned int uint;     /* fast unsigned integer, 2 or 4 bytes  */
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/ari/rangecod.c:
--------------------------------------------------------------------------------
  1 | #define NOWARN
  2 | 
  3 | /*
  4 |   define EXTRAFAST for increased speed; you loose compression and
  5 |   compatibility in exchange.
  6 | */
  7 | //#define EXTRAFAST 
  8 | 
  9 | #include <stdio.h>	
 10 | #include "port.h"
 11 | #include "rangecod.h"
 12 | 
 13 | void encbit(rangecoder *rc,int bit, int *prop)
 14 | {
 15 |         int prob=(*prop)>>3;
 16 | 	
 17 | 	int x=6;
 18 | 
 19 | 	if (bit) {
 20 |         encode_shift(rc,8192-prob,prob,13);
 21 | 	*prop-=(*prop)>>x;
 22 | 	} else {
 23 |         encode_shift(rc,prob,0,13);
 24 | 	*prop+=(65536-(*prop))>>x;
 25 | 	}
 26 | 	*prop&=~1;
 27 | 	*prop|=bit;
 28 | }
 29 | /* SIZE OF RANGE ENCODING CODE VALUES. */
 30 | 
 31 | #define CODE_BITS 32
 32 | #define Top_value ((code_value)1 << (CODE_BITS-1))
 33 | 
 34 | 
 35 | /* all IO is done by these macros - change them if you want to */
 36 | /* no checking is done - do it here if you want it             */
 37 | /* cod is a pointer to the used rangecoder                     */
 38 | #define outbyte(cod,x) putchar(x)
 39 | #define inbyte(cod)    getchar()
 40 | 
 41 | 
 42 | #ifdef RENORM95
 43 | #include "renorm95.c"
 44 | 
 45 | #else
 46 | #define SHIFT_BITS (CODE_BITS - 9)
 47 | #define EXTRA_BITS ((CODE_BITS-2) % 8 + 1)
 48 | #define Bottom_value (Top_value >> 8)
 49 | 
 50 | #ifdef NOWARN
 51 | #ifdef GLOBALRANGECODER
 52 | char coderversion[]="rangecoder 1.3 NOWARN GLOBAL (c) 1997-2000 Michael Schindler";
 53 | #else
 54 | char coderversion[]="rangecoder 1.3 NOWARN (c) 1997-2000 Michael Schindler";
 55 | #endif
 56 | #else    /*NOWARN*/
 57 | #ifdef GLOBALRANGECODER
 58 | char coderversion[]="rangecoder 1.3 GLOBAL (c) 1997-2000 Michael Schindler";
 59 | #else
 60 | char coderversion[]="rangecoder 1.3 (c) 1997-2000 Michael Schindler";
 61 | #endif
 62 | #endif   /*NOWARN*/
 63 | #endif   /*RENORM95*/
 64 | 
 65 | 
 66 | #ifdef GLOBALRANGECODER
 67 | /* if this is defined we'll make a global variable rngc and    */
 68 | /* make RNGC use that var; we'll also omit unneeded parameters */
 69 | static rangecoder rngc;
 70 | #define RNGC (rngc)
 71 | #define M_outbyte(a) outbyte(&rngc,a)
 72 | #define M_inbyte inbyte(&rngc)
 73 | #define enc_normalize(rc) M_enc_normalize()
 74 | #define dec_normalize(rc) M_dec_normalize()
 75 | #else
 76 | #define RNGC (*rc)
 77 | #define M_outbyte(a) outbyte(rc,a)
 78 | #define M_inbyte inbyte(rc)
 79 | #endif
 80 | 
 81 | 
 82 | /* rc is the range coder to be used                            */
 83 | /* c is written as first byte in the datastream                */
 84 | /* one could do without c, but then you have an additional if  */
 85 | /* per outputbyte.                                             */
 86 | void start_encoding( rangecoder *rc, char c, int initlength )
 87 | {   RNGC.low = 0;                /* Full code range */
 88 |     RNGC.range = Top_value;
 89 |     RNGC.buffer = c;
 90 |     RNGC.help = 0;               /* No bytes to follow */
 91 |     RNGC.bytecount = initlength;
 92 | }
 93 | 
 94 | 
 95 | #ifndef RENORM95
 96 | /* I do the normalization before I need a defined state instead of */
 97 | /* after messing it up. This simplifies starting and ending.       */
 98 | static Inline void enc_normalize( rangecoder *rc )
 99 | {   while(RNGC.range <= Bottom_value)     /* do we need renormalisation?  */
100 |     {   if (RNGC.low < (code_value)0xff<<SHIFT_BITS)  /* no carry possible --> output */
101 |         {   M_outbyte(RNGC.buffer);
102 |             for(; RNGC.help; RNGC.help--)
103 |                 M_outbyte(0xff);
104 |             RNGC.buffer = (unsigned char)(RNGC.low >> SHIFT_BITS);
105 |         } else if (RNGC.low & Top_value) /* carry now, no future carry */
106 |         {   M_outbyte(RNGC.buffer+1);
107 |             for(; RNGC.help; RNGC.help--)
108 |                 M_outbyte(0);
109 |             RNGC.buffer = (unsigned char)(RNGC.low >> SHIFT_BITS);
110 |         } else                           /* passes on a potential carry */
111 | #ifdef NOWARN
112 |             RNGC.help++;
113 | #else
114 |             if (RNGC.bytestofollow++ == 0xffffffffL)
115 |             {   fprintf(stderr,"Too many bytes outstanding - File too large\n");
116 |                 exit(1);
117 |             }
118 | #endif
119 |         RNGC.range <<= 8;
120 |         RNGC.low = (RNGC.low<<8) & (Top_value-1);
121 |         RNGC.bytecount++;
122 |     }
123 | }
124 | #endif
125 | 
126 | 
127 | /* Encode a symbol using frequencies                         */
128 | /* rc is the range coder to be used                          */
129 | /* sy_f is the interval length (frequency of the symbol)     */
130 | /* lt_f is the lower end (frequency sum of < symbols)        */
131 | /* tot_f is the total interval length (total frequency sum)  */
132 | /* or (faster): tot_f = (code_value)1<<shift                             */
133 | void encode_freq( rangecoder *rc, freq sy_f, freq lt_f, freq tot_f )
134 | {	code_value r, tmp;
135 | 	enc_normalize( rc );
136 | 	r = RNGC.range / tot_f;
137 | 	tmp = r * lt_f;
138 | 	RNGC.low += tmp;
139 | #ifdef EXTRAFAST
140 |     RNGC.range = r * sy_f;
141 | #else
142 |     if (lt_f+sy_f < tot_f)
143 | 		RNGC.range = r * sy_f;
144 |     else
145 | 		RNGC.range -= tmp;
146 | #endif
147 | }
148 | 
149 | void encode_shift( rangecoder *rc, freq sy_f, freq lt_f, freq shift )
150 | {	code_value r, tmp;
151 | 	enc_normalize( rc );
152 | 	r = RNGC.range >> shift;
153 | 	tmp = r * lt_f;
154 | 	RNGC.low += tmp;
155 | #ifdef EXTRAFAST
156 | 	RNGC.range = r * sy_f;
157 | #else
158 | 	if ((lt_f+sy_f) >> shift)
159 | 		RNGC.range -= tmp;
160 | 	else  
161 | 		RNGC.range = r * sy_f;
162 | #endif
163 | }
164 | 
165 | 
166 | #ifndef RENORM95
167 | /* Finish encoding                                           */
168 | /* rc is the range coder to be used                          */
169 | /* actually not that many bytes need to be output, but who   */
170 | /* cares. I output them because decode will read them :)     */
171 | /* the return value is the number of bytes written           */
172 | uint4 done_encoding( rangecoder *rc )
173 | {   uint tmp;
174 |     enc_normalize(rc);     /* now we have a normalized state */
175 |     RNGC.bytecount += 5;
176 |     if ((RNGC.low & (Bottom_value-1)) < ((RNGC.bytecount&0xffffffL)>>1))
177 |        tmp = RNGC.low >> SHIFT_BITS;
178 |     else
179 |        tmp = (RNGC.low >> SHIFT_BITS) + 1;
180 |     if (tmp > 0xff) /* we have a carry */
181 |     {   M_outbyte(RNGC.buffer+1);
182 |         for(; RNGC.help; RNGC.help--)
183 |             M_outbyte(0);
184 |     } else  /* no carry */
185 |     {   M_outbyte(RNGC.buffer);
186 |         for(; RNGC.help; RNGC.help--)
187 |             M_outbyte(0xff);
188 |     }
189 |     M_outbyte(tmp & 0xff);
190 |     M_outbyte((RNGC.bytecount>>16) & 0xff);
191 |     M_outbyte((RNGC.bytecount>>8) & 0xff);
192 |     M_outbyte(RNGC.bytecount & 0xff);
193 |     return RNGC.bytecount;
194 | }
195 | 
196 | 
197 | /* Start the decoder                                         */
198 | /* rc is the range coder to be used                          */
199 | /* returns the char from start_encoding or EOF               */
200 | int start_decoding( rangecoder *rc )
201 | {   int c = M_inbyte;
202 |     if (c==EOF)
203 |         return EOF;
204 |     RNGC.buffer = M_inbyte;
205 |     RNGC.low = RNGC.buffer >> (8-EXTRA_BITS);
206 |     RNGC.range = (code_value)1 << EXTRA_BITS;
207 |     return c;
208 | }
209 | 
210 | 
211 | static Inline void dec_normalize( rangecoder *rc )
212 | {   while (RNGC.range <= Bottom_value)
213 |     {   RNGC.low = (RNGC.low<<8) | ((RNGC.buffer<<EXTRA_BITS)&0xff);
214 |         RNGC.buffer = M_inbyte;
215 |         RNGC.low |= RNGC.buffer >> (8-EXTRA_BITS);
216 |         RNGC.range <<= 8;
217 |     }
218 | }
219 | #endif
220 | 
221 | 
222 | /* Calculate culmulative frequency for next symbol. Does NO update!*/
223 | /* rc is the range coder to be used                          */
224 | /* tot_f is the total frequency                              */
225 | /* or: totf is (code_value)1<<shift                                      */
226 | /* returns the culmulative frequency                         */
227 | freq decode_culfreq( rangecoder *rc, freq tot_f )
228 | {   freq tmp;
229 |     dec_normalize(rc);
230 |     RNGC.help = RNGC.range/tot_f;
231 |     tmp = RNGC.low/RNGC.help;
232 | #ifdef EXTRAFAST
233 |     return tmp;
234 | #else
235 |     return (tmp>=tot_f ? tot_f-1 : tmp);
236 | #endif
237 | }
238 | 
239 | freq decode_culshift( rangecoder *rc, freq shift )
240 | {   freq tmp;
241 |     dec_normalize(rc);
242 |     RNGC.help = RNGC.range>>shift;
243 |     tmp = RNGC.low/RNGC.help;
244 | #ifdef EXTRAFAST
245 |     return tmp;
246 | #else
247 |     return (tmp>>shift ? ((code_value)1<<shift)-1 : tmp);
248 | #endif
249 | }
250 | 
251 | 
252 | /* Update decoding state                                     */
253 | /* rc is the range coder to be used                          */
254 | /* sy_f is the interval length (frequency of the symbol)     */
255 | /* lt_f is the lower end (frequency sum of < symbols)        */
256 | /* tot_f is the total interval length (total frequency sum)  */
257 | void decode_update( rangecoder *rc, freq sy_f, freq lt_f, freq tot_f)
258 | {   code_value tmp;
259 |     tmp = RNGC.help * lt_f;
260 |     RNGC.low -= tmp;
261 | #ifdef EXTRAFAST
262 |     RNGC.range = RNGC.help * sy_f;
263 | #else
264 |     if (lt_f + sy_f < tot_f)
265 |         RNGC.range = RNGC.help * sy_f;
266 |     else
267 |         RNGC.range -= tmp;
268 | #endif
269 | }
270 | 
271 | 
272 | /* Decode a byte/short without modelling                     */
273 | /* rc is the range coder to be used                          */
274 | unsigned char decode_byte(rangecoder *rc)
275 | {   unsigned char tmp = decode_culshift(rc,8);
276 |     decode_update( rc,1,tmp,(freq)1<<8);
277 |     return tmp;
278 | }
279 | 
280 | unsigned short decode_short(rangecoder *rc)
281 | {   unsigned short tmp = decode_culshift(rc,16);
282 |     decode_update( rc,1,tmp,(freq)1<<16);
283 |     return tmp;
284 | }
285 | 
286 | 
287 | /* Finish decoding                                           */
288 | /* rc is the range coder to be used                          */
289 | void done_decoding( rangecoder *rc )
290 | {   dec_normalize(rc);      /* normalize to use up all bytes */
291 | }
292 | 


--------------------------------------------------------------------------------
/ari/rangecod.h:
--------------------------------------------------------------------------------
  1 | #ifndef rangecod_h
  2 | #define rangecod_h
  3 | 
  4 | /*
  5 |   rangecod.h     headerfile for range encoding
  6 | 
  7 |   (c) Michael Schindler
  8 |   1997, 1998, 1999, 2000
  9 |   http://www.compressconsult.com/
 10 |   michael@compressconsult.com
 11 | 
 12 |   This program is free software; you can redistribute it and/or modify
 13 |   it under the terms of the GNU General Public License as published by
 14 |   the Free Software Foundation; either version 2 of the License, or
 15 |   (at your option) any later version.
 16 | 
 17 |   This program is distributed in the hope that it will be useful,
 18 |   but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 |   GNU General Public License for more details.  It may be that this
 21 |   program violates local patents in your country, however it is
 22 |   belived (NO WARRANTY!) to be patent-free here in Austria. Glen
 23 |   Langdon also confirmed my poinion that IBM UK did not protect that
 24 |   method.
 25 | 
 26 |   You should have received a copy of the GNU General Public License
 27 |   along with this program; if not, write to the Free Software
 28 |   Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 29 |   MA 02111-1307, USA.
 30 | 
 31 |   Range encoding is based on an article by G.N.N. Martin, submitted
 32 |   March 1979 and presented on the Video & Data Recording Conference,
 33 |   Southampton, July 24-27, 1979. If anyone can name the original
 34 |   copyright holder of that article please contact me; this might
 35 |   allow me to make that article available on the net for general
 36 |   public.
 37 | 
 38 |   Range coding is closely related to arithmetic coding, except that
 39 |   it does renormalisation in larger units than bits and is thus
 40 |   faster. An earlier version of this code was distributed as byte
 41 |   oriented arithmetic coding, but then I had no knowledge of Martin's
 42 |   paper from 1979.
 43 | 
 44 |   The input and output is done by the INBYTE and OUTBYTE macros
 45 |   defined in the .c file; change them as needed; the first parameter
 46 |   passed to them is a pointer to the rangecoder structure; extend that
 47 |   structure as needed (and don't forget to initialize the values in
 48 |   start_encoding resp. start_decoding). This distribution writes to
 49 |   stdout and reads from stdin.
 50 | 
 51 |   There are no global or static var's, so if the IO is thread save the
 52 |   whole rangecoder is - unless GLOBALRANGECODER in rangecod.h is defined.
 53 | 
 54 |   For error recovery the last 3 bytes written contain the total number
 55 |   of bytes written since starting the encoder. This can be used to
 56 |   locate the beginning of a block if you have only the end.
 57 | 
 58 |   For some application using a global coder variable may provide a better
 59 |   performance. This will allow you to use only one coder at a time and
 60 |   will destroy thread savety. To enabble this feature uncomment the
 61 |   #define GLOBALRANGECODER line below.
 62 | */
 63 | /* #define GLOBALRANGECODER */
 64 | 
 65 | 
 66 | #include "port.h"
 67 | #if 0    /* done in port.h */
 68 | #include <limits.h>
 69 | #if INT_MAX > 0xffff
 70 | typedef unsigned int uint4;
 71 | typedef unsigned short uint2;
 72 | #else
 73 | typedef unsigned long uint4;
 74 | typedef unsigned int uint2;
 75 | #endif
 76 | #endif
 77 | 
 78 | extern char coderversion[];
 79 | 
 80 | typedef uint4 code_value;       /* Type of an rangecode value       */
 81 |                                 /* must accomodate 32 bits          */
 82 | /* it is highly recommended that the total frequency count is less  */
 83 | /* than 1 << 19 to minimize rounding effects.                       */
 84 | /* the total frequency count MUST be less than 1<<23                */
 85 | 
 86 | typedef uint4 freq; 
 87 | 
 88 | /* make the following private in the arithcoder object in C++	    */
 89 | 
 90 | typedef struct {
 91 |     uint4 low,           /* low end of interval */
 92 |           range,         /* length of interval */
 93 |           help;          /* bytes_to_follow resp. intermediate value */
 94 |     unsigned char buffer;/* buffer for input/output */
 95 | /* the following is used only when encoding */
 96 |     uint4 bytecount;     /* counter for outputed bytes  */
 97 | /* insert fields you need for input/output below this line! */
 98 | } rangecoder;
 99 | 
100 | 
101 | void encbit(rangecoder *rc, int bit, int *prop);
102 | /* supply the following as methods of the arithcoder object  */
103 | /* omit the first parameter then (C++)                       */
104 | #ifdef GLOBALRANGECODER
105 | #define start_encoding(rc,a,b) M_start_encoding(a,b)
106 | #define encode_freq(rc,a,b,c) M_encode_freq(a,b,c)
107 | #define encode_shift(rc,a,b,c) M_encode_shift(a,b,c)
108 | #define done_encoding(rc) M_done_encoding()
109 | #define start_decoding(rc) M_start_decoding()
110 | #define decode_culfreq(rc,a) M_decode_culfreq(a)
111 | #define decode_culshift(rc,a) M_decode_culshift(a)
112 | #define decode_update(rc,a,b,c) M_decode_update(a,b,c)
113 | #define decode_byte(rc) M_decode_byte()
114 | #define decode_short(rc) M_decode_short()
115 | #define done_decoding(rc) M_done_decoding()
116 | #endif
117 | 
118 | 
119 | /* Start the encoder                                         */
120 | /* rc is the range coder to be used                          */
121 | /* c is written as first byte in the datastream (header,...) */
122 | void start_encoding( rangecoder *rc, char c, int initlength);
123 | 
124 | 
125 | /* Encode a symbol using frequencies                         */
126 | /* rc is the range coder to be used                          */
127 | /* sy_f is the interval length (frequency of the symbol)     */
128 | /* lt_f is the lower end (frequency sum of < symbols)        */
129 | /* tot_f is the total interval length (total frequency sum)  */
130 | /* or (a lot faster): tot_f = 1<<shift                       */
131 | void encode_freq( rangecoder *rc, freq sy_f, freq lt_f, freq tot_f );
132 | void encode_shift( rangecoder *rc, freq sy_f, freq lt_f, freq shift );
133 | 
134 | /* Encode a byte/short without modelling                     */
135 | /* rc is the range coder to be used                          */
136 | /* b,s is the data to be encoded                             */
137 | #define encode_byte(ac,b)  encode_shift(ac,(freq)1,(freq)(b),(freq)8)
138 | #define encode_short(ac,s) encode_shift(ac,(freq)1,(freq)(s),(freq)16)
139 | 
140 | 
141 | /* Finish encoding                                           */
142 | /* rc is the range coder to be shut down                     */
143 | /* returns number of bytes written                           */
144 | uint4 done_encoding( rangecoder *rc );
145 | 
146 | 
147 | 
148 | /* Start the decoder                                         */
149 | /* rc is the range coder to be used                          */
150 | /* returns the char from start_encoding or EOF               */
151 | int start_decoding( rangecoder *rc );
152 | 
153 | /* Calculate culmulative frequency for next symbol. Does NO update!*/
154 | /* rc is the range coder to be used                          */
155 | /* tot_f is the total frequency                              */
156 | /* or: totf is 1<<shift                                      */
157 | /* returns the <= culmulative frequency                      */
158 | freq decode_culfreq( rangecoder *rc, freq tot_f );
159 | freq decode_culshift( rangecoder *ac, freq shift );
160 | 
161 | /* Update decoding state                                     */
162 | /* rc is the range coder to be used                          */
163 | /* sy_f is the interval length (frequency of the symbol)     */
164 | /* lt_f is the lower end (frequency sum of < symbols)        */
165 | /* tot_f is the total interval length (total frequency sum)  */
166 | void decode_update( rangecoder *rc, freq sy_f, freq lt_f, freq tot_f);
167 | #define decode_update_shift(rc,f1,f2,f3) decode_update((rc),(f1),(f2),(freq)1<<(f3));
168 | 
169 | /* Decode a byte/short without modelling                     */
170 | /* rc is the range coder to be used                          */
171 | unsigned char decode_byte(rangecoder *rc);
172 | unsigned short decode_short(rangecoder *rc);
173 | 
174 | 
175 | /* Finish decoding                                           */
176 | /* rc is the range coder to be used                          */
177 | void done_decoding( rangecoder *rc );
178 | 
179 | #endif
180 | 


--------------------------------------------------------------------------------
/bpe.h:
--------------------------------------------------------------------------------
  1 | #include <string.h>
  2 | #include <stdint.h>
  3 | 
  4 | uint8_t bpe_flags[8192];
  5 | 
  6 | static inline void set_bpe(uint8_t a,uint8_t b)
  7 | {
  8 |   int ab=a;
  9 |   ab<<=5;
 10 |   ab+=b>>3;
 11 |   bpe_flags[ab]|=(1<<(b&7));
 12 | }
 13 | 
 14 | static inline void unset_bpe(uint8_t a,uint8_t b)
 15 | {
 16 |   int ab=a;
 17 |   ab<<=5;
 18 |   ab+=b>>3;
 19 |   bpe_flags[ab]&=~(1<<(b&7));
 20 | }
 21 | 
 22 | static inline int has_bpe(uint8_t a,uint8_t b)
 23 | {
 24 |   int ab=a;
 25 |   ab<<=5;
 26 |   ab+=b>>3;
 27 |   return bpe_flags[ab]&(1<<(b&7));
 28 | }
 29 | 
 30 | #define BPE 1024
 31 | int bpe_last_ofs[BPE];
 32 | int bpe_num;
 33 | int bpe_head;
 34 | 
 35 | void bpe_init() {
 36 |   bpe_num=0;
 37 |   bpe_head=0;
 38 |   memset(bpe_flags,0,8192);
 39 | }
 40 | 
 41 | void bpe_push(uint8_t *buf, int pos)
 42 | {
 43 |   if (pos<2) return;
 44 |   uint8_t a=buf[pos-2];
 45 |   uint8_t b=buf[pos-1];
 46 |   if (has_bpe(a,b)) {
 47 |     return;
 48 |   }
 49 |   if (bpe_num==BPE) {
 50 |     int prev_pos=bpe_last_ofs[bpe_head];
 51 |     uint8_t pa=buf[prev_pos];
 52 |     uint8_t pb=buf[prev_pos+1];
 53 |     unset_bpe(pa,pb);
 54 |   }
 55 |   bpe_last_ofs[bpe_head++]=pos-2;
 56 |   if (bpe_head==BPE) bpe_head=0;
 57 |   if (bpe_num<BPE) bpe_num++;
 58 | 
 59 |   set_bpe(a,b);
 60 | }
 61 | 
 62 | int find_bpes(uint8_t *buf, int n, int *offsets, int *rofs, int *totals)
 63 | {
 64 |   int bpe_index[256][256];
 65 |   int i;
 66 |   int cnt=0;
 67 | 
 68 |   bpe_init();
 69 |   offsets[0]=-1;
 70 |   rofs[0]=-1;
 71 |   totals[0]=0;
 72 |   offsets[1]=-1;
 73 |   rofs[1]=-1;
 74 |   totals[1]=1;
 75 |   if (buf[1]==buf[0]&&buf[2]==buf[1]) {
 76 |     offsets[1]=0;
 77 |     rofs[1]=0;
 78 |     totals[1]=1;
 79 |     cnt++;
 80 |   }
 81 |   for(i=2;i<n-1;i++) {
 82 |     int cur_head=bpe_head;
 83 |     bpe_push(buf,i);
 84 |     if (bpe_head!=cur_head)
 85 |       bpe_index[buf[i-2]][buf[i-1]]=cur_head;
 86 |     totals[i]=bpe_num+1;
 87 |     if (buf[i]==buf[i-1]&&buf[i]==buf[i+1]) {
 88 |       offsets[i]=i-1;
 89 |       rofs[i]=0;
 90 |       cnt++;
 91 |       continue;
 92 |     }
 93 |     if (has_bpe(buf[i],buf[i+1])) {
 94 |       int index=bpe_index[buf[i]][buf[i+1]];
 95 |       offsets[i]=bpe_last_ofs[index];
 96 |       index=bpe_head-index;
 97 |       if (index<0) index+=BPE;
 98 |       rofs[i]=index+1;
 99 |       cnt++;
100 |       continue;
101 |     }
102 |     offsets[i]=-1;
103 |     rofs[i]=-1;
104 |   }
105 |   return cnt;
106 | }
107 | 
108 | int cnt_bpes(uint8_t *buf, int n)
109 | {
110 |   int bpe_index[256][256];
111 |   int i;
112 |   int cnt=0;
113 | 
114 |   bpe_init();
115 |   if (buf[1]==buf[0]&&buf[2]==buf[1]) {
116 |     cnt++;
117 |   }
118 |   for(i=2;i<n-1;i++) {
119 |     int cur_head=bpe_head;
120 |     bpe_push(buf,i);
121 |     if (bpe_head!=cur_head)
122 |       bpe_index[buf[i-2]][buf[i-1]]=cur_head;
123 |     if (buf[i]==buf[i-1]&&buf[i]==buf[i+1]) {
124 |       cnt++;
125 |       continue;
126 |     }
127 |     if (has_bpe(buf[i],buf[i+1])) {
128 |       cnt++;
129 |       continue;
130 |     }
131 |   }
132 |   return cnt;
133 | }
134 | 


--------------------------------------------------------------------------------
/divsufsort.c:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * divsufsort.c for libdivsufsort-lite
   3 |  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
   4 |  *
   5 |  * Permission is hereby granted, free of charge, to any person
   6 |  * obtaining a copy of this software and associated documentation
   7 |  * files (the "Software"), to deal in the Software without
   8 |  * restriction, including without limitation the rights to use,
   9 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 |  * copies of the Software, and to permit persons to whom the
  11 |  * Software is furnished to do so, subject to the following
  12 |  * conditions:
  13 |  *
  14 |  * The above copyright notice and this permission notice shall be
  15 |  * included in all copies or substantial portions of the Software.
  16 |  *
  17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  19 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  21 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  22 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24 |  * OTHER DEALINGS IN THE SOFTWARE.
  25 |  */
  26 | 
  27 | #include <stdio.h>
  28 | #include <stdlib.h>
  29 | #ifdef _OPENMP
  30 | # include <omp.h>
  31 | #endif
  32 | #include "divsufsort.h"
  33 | 
  34 | 
  35 | /*- Constants -*/
  36 | #define INLINE __inline
  37 | #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
  38 | # undef ALPHABET_SIZE
  39 | #endif
  40 | #if !defined(ALPHABET_SIZE)
  41 | # define ALPHABET_SIZE (256)
  42 | #endif
  43 | #define BUCKET_A_SIZE (ALPHABET_SIZE)
  44 | #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
  45 | #if defined(SS_INSERTIONSORT_THRESHOLD)
  46 | # if SS_INSERTIONSORT_THRESHOLD < 1
  47 | #  undef SS_INSERTIONSORT_THRESHOLD
  48 | #  define SS_INSERTIONSORT_THRESHOLD (1)
  49 | # endif
  50 | #else
  51 | # define SS_INSERTIONSORT_THRESHOLD (8)
  52 | #endif
  53 | #if defined(SS_BLOCKSIZE)
  54 | # if SS_BLOCKSIZE < 0
  55 | #  undef SS_BLOCKSIZE
  56 | #  define SS_BLOCKSIZE (0)
  57 | # elif 32768 <= SS_BLOCKSIZE
  58 | #  undef SS_BLOCKSIZE
  59 | #  define SS_BLOCKSIZE (32767)
  60 | # endif
  61 | #else
  62 | # define SS_BLOCKSIZE (1024)
  63 | #endif
  64 | /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
  65 | #if SS_BLOCKSIZE == 0
  66 | # define SS_MISORT_STACKSIZE (96)
  67 | #elif SS_BLOCKSIZE <= 4096
  68 | # define SS_MISORT_STACKSIZE (16)
  69 | #else
  70 | # define SS_MISORT_STACKSIZE (24)
  71 | #endif
  72 | #define SS_SMERGE_STACKSIZE (32)
  73 | #define TR_INSERTIONSORT_THRESHOLD (8)
  74 | #define TR_STACKSIZE (64)
  75 | 
  76 | 
  77 | /*- Macros -*/
  78 | #ifndef SWAP
  79 | # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
  80 | #endif /* SWAP */
  81 | #ifndef MIN
  82 | # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
  83 | #endif /* MIN */
  84 | #ifndef MAX
  85 | # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
  86 | #endif /* MAX */
  87 | #define STACK_PUSH(_a, _b, _c, _d)\
  88 |   do {\
  89 |     stack[ssize].a = (_a), stack[ssize].b = (_b),\
  90 |     stack[ssize].c = (_c), stack[ssize++].d = (_d);\
  91 |   } while(0)
  92 | #define STACK_PUSH5(_a, _b, _c, _d, _e)\
  93 |   do {\
  94 |     stack[ssize].a = (_a), stack[ssize].b = (_b),\
  95 |     stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
  96 |   } while(0)
  97 | #define STACK_POP(_a, _b, _c, _d)\
  98 |   do {\
  99 |     if(ssize == 0) { return; }\
 100 |     (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
 101 |     (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
 102 |   } while(0)
 103 | #define STACK_POP5(_a, _b, _c, _d, _e)\
 104 |   do {\
 105 |     if(ssize == 0) { return; }\
 106 |     (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
 107 |     (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
 108 |   } while(0)
 109 | #define BUCKET_A(_c0) bucket_A[(_c0)]
 110 | #if ALPHABET_SIZE == 256
 111 | #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
 112 | #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
 113 | #else
 114 | #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
 115 | #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
 116 | #endif
 117 | 
 118 | 
 119 | /*- Private Functions -*/
 120 | 
 121 | static const int lg_table[256]= {
 122 |  -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
 123 |   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
 124 |   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
 125 |   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
 126 |   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
 127 |   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
 128 |   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
 129 |   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
 130 | };
 131 | 
 132 | #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
 133 | 
 134 | static INLINE
 135 | int
 136 | ss_ilg(int n) {
 137 | #if SS_BLOCKSIZE == 0
 138 |   return (n & 0xffff0000) ?
 139 |           ((n & 0xff000000) ?
 140 |             24 + lg_table[(n >> 24) & 0xff] :
 141 |             16 + lg_table[(n >> 16) & 0xff]) :
 142 |           ((n & 0x0000ff00) ?
 143 |              8 + lg_table[(n >>  8) & 0xff] :
 144 |              0 + lg_table[(n >>  0) & 0xff]);
 145 | #elif SS_BLOCKSIZE < 256
 146 |   return lg_table[n];
 147 | #else
 148 |   return (n & 0xff00) ?
 149 |           8 + lg_table[(n >> 8) & 0xff] :
 150 |           0 + lg_table[(n >> 0) & 0xff];
 151 | #endif
 152 | }
 153 | 
 154 | #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
 155 | 
 156 | #if SS_BLOCKSIZE != 0
 157 | 
 158 | static const int sqq_table[256] = {
 159 |   0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
 160 |  64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
 161 |  90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
 162 | 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
 163 | 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
 164 | 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
 165 | 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
 166 | 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
 167 | 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
 168 | 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
 169 | 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
 170 | 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
 171 | 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
 172 | 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
 173 | 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
 174 | 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
 175 | };
 176 | 
 177 | static INLINE
 178 | int
 179 | ss_isqrt(int x) {
 180 |   int y, e;
 181 | 
 182 |   if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
 183 |   e = (x & 0xffff0000) ?
 184 |         ((x & 0xff000000) ?
 185 |           24 + lg_table[(x >> 24) & 0xff] :
 186 |           16 + lg_table[(x >> 16) & 0xff]) :
 187 |         ((x & 0x0000ff00) ?
 188 |            8 + lg_table[(x >>  8) & 0xff] :
 189 |            0 + lg_table[(x >>  0) & 0xff]);
 190 | 
 191 |   if(e >= 16) {
 192 |     y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
 193 |     if(e >= 24) { y = (y + 1 + x / y) >> 1; }
 194 |     y = (y + 1 + x / y) >> 1;
 195 |   } else if(e >= 8) {
 196 |     y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
 197 |   } else {
 198 |     return sqq_table[x] >> 4;
 199 |   }
 200 | 
 201 |   return (x < (y * y)) ? y - 1 : y;
 202 | }
 203 | 
 204 | #endif /* SS_BLOCKSIZE != 0 */
 205 | 
 206 | 
 207 | /*---------------------------------------------------------------------------*/
 208 | 
 209 | /* Compares two suffixes. */
 210 | static INLINE
 211 | int
 212 | ss_compare(const unsigned char *T,
 213 |            const int *p1, const int *p2,
 214 |            int depth) {
 215 |   const unsigned char *U1, *U2, *U1n, *U2n;
 216 | 
 217 |   for(U1 = T + depth + *p1,
 218 |       U2 = T + depth + *p2,
 219 |       U1n = T + *(p1 + 1) + 2,
 220 |       U2n = T + *(p2 + 1) + 2;
 221 |       (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
 222 |       ++U1, ++U2) {
 223 |   }
 224 | 
 225 |   return U1 < U1n ?
 226 |         (U2 < U2n ? *U1 - *U2 : 1) :
 227 |         (U2 < U2n ? -1 : 0);
 228 | }
 229 | 
 230 | 
 231 | /*---------------------------------------------------------------------------*/
 232 | 
 233 | #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
 234 | 
 235 | /* Insertionsort for small size groups */
 236 | static
 237 | void
 238 | ss_insertionsort(const unsigned char *T, const int *PA,
 239 |                  int *first, int *last, int depth) {
 240 |   int *i, *j;
 241 |   int t;
 242 |   int r;
 243 | 
 244 |   for(i = last - 2; first <= i; --i) {
 245 |     for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
 246 |       do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
 247 |       if(last <= j) { break; }
 248 |     }
 249 |     if(r == 0) { *j = ~*j; }
 250 |     *(j - 1) = t;
 251 |   }
 252 | }
 253 | 
 254 | #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
 255 | 
 256 | 
 257 | /*---------------------------------------------------------------------------*/
 258 | 
 259 | #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
 260 | 
 261 | static INLINE
 262 | void
 263 | ss_fixdown(const unsigned char *Td, const int *PA,
 264 |            int *SA, int i, int size) {
 265 |   int j, k;
 266 |   int v;
 267 |   int c, d, e;
 268 | 
 269 |   for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
 270 |     d = Td[PA[SA[k = j++]]];
 271 |     if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
 272 |     if(d <= c) { break; }
 273 |   }
 274 |   SA[i] = v;
 275 | }
 276 | 
 277 | /* Simple top-down heapsort. */
 278 | static
 279 | void
 280 | ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
 281 |   int i, m;
 282 |   int t;
 283 | 
 284 |   m = size;
 285 |   if((size % 2) == 0) {
 286 |     m--;
 287 |     if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
 288 |   }
 289 | 
 290 |   for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
 291 |   if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
 292 |   for(i = m - 1; 0 < i; --i) {
 293 |     t = SA[0], SA[0] = SA[i];
 294 |     ss_fixdown(Td, PA, SA, 0, i);
 295 |     SA[i] = t;
 296 |   }
 297 | }
 298 | 
 299 | 
 300 | /*---------------------------------------------------------------------------*/
 301 | 
 302 | /* Returns the median of three elements. */
 303 | static INLINE
 304 | int *
 305 | ss_median3(const unsigned char *Td, const int *PA,
 306 |            int *v1, int *v2, int *v3) {
 307 |   int *t;
 308 |   if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
 309 |   if(Td[PA[*v2]] > Td[PA[*v3]]) {
 310 |     if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
 311 |     else { return v3; }
 312 |   }
 313 |   return v2;
 314 | }
 315 | 
 316 | /* Returns the median of five elements. */
 317 | static INLINE
 318 | int *
 319 | ss_median5(const unsigned char *Td, const int *PA,
 320 |            int *v1, int *v2, int *v3, int *v4, int *v5) {
 321 |   int *t;
 322 |   if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
 323 |   if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
 324 |   if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
 325 |   if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
 326 |   if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
 327 |   if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
 328 |   return v3;
 329 | }
 330 | 
 331 | /* Returns the pivot element. */
 332 | static INLINE
 333 | int *
 334 | ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
 335 |   int *middle;
 336 |   int t;
 337 | 
 338 |   t = last - first;
 339 |   middle = first + t / 2;
 340 | 
 341 |   if(t <= 512) {
 342 |     if(t <= 32) {
 343 |       return ss_median3(Td, PA, first, middle, last - 1);
 344 |     } else {
 345 |       t >>= 2;
 346 |       return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
 347 |     }
 348 |   }
 349 |   t >>= 3;
 350 |   first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
 351 |   middle = ss_median3(Td, PA, middle - t, middle, middle + t);
 352 |   last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
 353 |   return ss_median3(Td, PA, first, middle, last);
 354 | }
 355 | 
 356 | 
 357 | /*---------------------------------------------------------------------------*/
 358 | 
 359 | /* Binary partition for substrings. */
 360 | static INLINE
 361 | int *
 362 | ss_partition(const int *PA,
 363 |                     int *first, int *last, int depth) {
 364 |   int *a, *b;
 365 |   int t;
 366 |   for(a = first - 1, b = last;;) {
 367 |     for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
 368 |     for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
 369 |     if(b <= a) { break; }
 370 |     t = ~*b;
 371 |     *b = *a;
 372 |     *a = t;
 373 |   }
 374 |   if(first < a) { *first = ~*first; }
 375 |   return a;
 376 | }
 377 | 
 378 | /* Multikey introsort for medium size groups. */
 379 | static
 380 | void
 381 | ss_mintrosort(const unsigned char *T, const int *PA,
 382 |               int *first, int *last,
 383 |               int depth) {
 384 | #define STACK_SIZE SS_MISORT_STACKSIZE
 385 |   struct { int *a, *b, c; int d; } stack[STACK_SIZE];
 386 |   const unsigned char *Td;
 387 |   int *a, *b, *c, *d, *e, *f;
 388 |   int s, t;
 389 |   int ssize;
 390 |   int limit;
 391 |   int v, x = 0;
 392 | 
 393 |   for(ssize = 0, limit = ss_ilg(last - first);;) {
 394 | 
 395 |     if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
 396 | #if 1 < SS_INSERTIONSORT_THRESHOLD
 397 |       if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
 398 | #endif
 399 |       STACK_POP(first, last, depth, limit);
 400 |       continue;
 401 |     }
 402 | 
 403 |     Td = T + depth;
 404 |     if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
 405 |     if(limit < 0) {
 406 |       for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
 407 |         if((x = Td[PA[*a]]) != v) {
 408 |           if(1 < (a - first)) { break; }
 409 |           v = x;
 410 |           first = a;
 411 |         }
 412 |       }
 413 |       if(Td[PA[*first] - 1] < v) {
 414 |         first = ss_partition(PA, first, a, depth);
 415 |       }
 416 |       if((a - first) <= (last - a)) {
 417 |         if(1 < (a - first)) {
 418 |           STACK_PUSH(a, last, depth, -1);
 419 |           last = a, depth += 1, limit = ss_ilg(a - first);
 420 |         } else {
 421 |           first = a, limit = -1;
 422 |         }
 423 |       } else {
 424 |         if(1 < (last - a)) {
 425 |           STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
 426 |           first = a, limit = -1;
 427 |         } else {
 428 |           last = a, depth += 1, limit = ss_ilg(a - first);
 429 |         }
 430 |       }
 431 |       continue;
 432 |     }
 433 | 
 434 |     /* choose pivot */
 435 |     a = ss_pivot(Td, PA, first, last);
 436 |     v = Td[PA[*a]];
 437 |     SWAP(*first, *a);
 438 | 
 439 |     /* partition */
 440 |     for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
 441 |     if(((a = b) < last) && (x < v)) {
 442 |       for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
 443 |         if(x == v) { SWAP(*b, *a); ++a; }
 444 |       }
 445 |     }
 446 |     for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
 447 |     if((b < (d = c)) && (x > v)) {
 448 |       for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
 449 |         if(x == v) { SWAP(*c, *d); --d; }
 450 |       }
 451 |     }
 452 |     for(; b < c;) {
 453 |       SWAP(*b, *c);
 454 |       for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
 455 |         if(x == v) { SWAP(*b, *a); ++a; }
 456 |       }
 457 |       for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
 458 |         if(x == v) { SWAP(*c, *d); --d; }
 459 |       }
 460 |     }
 461 | 
 462 |     if(a <= d) {
 463 |       c = b - 1;
 464 | 
 465 |       if((s = a - first) > (t = b - a)) { s = t; }
 466 |       for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
 467 |       if((s = d - c) > (t = last - d - 1)) { s = t; }
 468 |       for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
 469 | 
 470 |       a = first + (b - a), c = last - (d - c);
 471 |       b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
 472 | 
 473 |       if((a - first) <= (last - c)) {
 474 |         if((last - c) <= (c - b)) {
 475 |           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
 476 |           STACK_PUSH(c, last, depth, limit);
 477 |           last = a;
 478 |         } else if((a - first) <= (c - b)) {
 479 |           STACK_PUSH(c, last, depth, limit);
 480 |           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
 481 |           last = a;
 482 |         } else {
 483 |           STACK_PUSH(c, last, depth, limit);
 484 |           STACK_PUSH(first, a, depth, limit);
 485 |           first = b, last = c, depth += 1, limit = ss_ilg(c - b);
 486 |         }
 487 |       } else {
 488 |         if((a - first) <= (c - b)) {
 489 |           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
 490 |           STACK_PUSH(first, a, depth, limit);
 491 |           first = c;
 492 |         } else if((last - c) <= (c - b)) {
 493 |           STACK_PUSH(first, a, depth, limit);
 494 |           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
 495 |           first = c;
 496 |         } else {
 497 |           STACK_PUSH(first, a, depth, limit);
 498 |           STACK_PUSH(c, last, depth, limit);
 499 |           first = b, last = c, depth += 1, limit = ss_ilg(c - b);
 500 |         }
 501 |       }
 502 |     } else {
 503 |       limit += 1;
 504 |       if(Td[PA[*first] - 1] < v) {
 505 |         first = ss_partition(PA, first, last, depth);
 506 |         limit = ss_ilg(last - first);
 507 |       }
 508 |       depth += 1;
 509 |     }
 510 |   }
 511 | #undef STACK_SIZE
 512 | }
 513 | 
 514 | #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
 515 | 
 516 | 
 517 | /*---------------------------------------------------------------------------*/
 518 | 
 519 | #if SS_BLOCKSIZE != 0
 520 | 
 521 | static INLINE
 522 | void
 523 | ss_blockswap(int *a, int *b, int n) {
 524 |   int t;
 525 |   for(; 0 < n; --n, ++a, ++b) {
 526 |     t = *a, *a = *b, *b = t;
 527 |   }
 528 | }
 529 | 
 530 | static INLINE
 531 | void
 532 | ss_rotate(int *first, int *middle, int *last) {
 533 |   int *a, *b, t;
 534 |   int l, r;
 535 |   l = middle - first, r = last - middle;
 536 |   for(; (0 < l) && (0 < r);) {
 537 |     if(l == r) { ss_blockswap(first, middle, l); break; }
 538 |     if(l < r) {
 539 |       a = last - 1, b = middle - 1;
 540 |       t = *a;
 541 |       do {
 542 |         *a-- = *b, *b-- = *a;
 543 |         if(b < first) {
 544 |           *a = t;
 545 |           last = a;
 546 |           if((r -= l + 1) <= l) { break; }
 547 |           a -= 1, b = middle - 1;
 548 |           t = *a;
 549 |         }
 550 |       } while(1);
 551 |     } else {
 552 |       a = first, b = middle;
 553 |       t = *a;
 554 |       do {
 555 |         *a++ = *b, *b++ = *a;
 556 |         if(last <= b) {
 557 |           *a = t;
 558 |           first = a + 1;
 559 |           if((l -= r + 1) <= r) { break; }
 560 |           a += 1, b = middle;
 561 |           t = *a;
 562 |         }
 563 |       } while(1);
 564 |     }
 565 |   }
 566 | }
 567 | 
 568 | 
 569 | /*---------------------------------------------------------------------------*/
 570 | 
 571 | static
 572 | void
 573 | ss_inplacemerge(const unsigned char *T, const int *PA,
 574 |                 int *first, int *middle, int *last,
 575 |                 int depth) {
 576 |   const int *p;
 577 |   int *a, *b;
 578 |   int len, half;
 579 |   int q, r;
 580 |   int x;
 581 | 
 582 |   for(;;) {
 583 |     if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
 584 |     else                { x = 0; p = PA +  *(last - 1); }
 585 |     for(a = first, len = middle - first, half = len >> 1, r = -1;
 586 |         0 < len;
 587 |         len = half, half >>= 1) {
 588 |       b = a + half;
 589 |       q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
 590 |       if(q < 0) {
 591 |         a = b + 1;
 592 |         half -= (len & 1) ^ 1;
 593 |       } else {
 594 |         r = q;
 595 |       }
 596 |     }
 597 |     if(a < middle) {
 598 |       if(r == 0) { *a = ~*a; }
 599 |       ss_rotate(a, middle, last);
 600 |       last -= middle - a;
 601 |       middle = a;
 602 |       if(first == middle) { break; }
 603 |     }
 604 |     --last;
 605 |     if(x != 0) { while(*--last < 0) { } }
 606 |     if(middle == last) { break; }
 607 |   }
 608 | }
 609 | 
 610 | 
 611 | /*---------------------------------------------------------------------------*/
 612 | 
 613 | /* Merge-forward with internal buffer. */
 614 | static
 615 | void
 616 | ss_mergeforward(const unsigned char *T, const int *PA,
 617 |                 int *first, int *middle, int *last,
 618 |                 int *buf, int depth) {
 619 |   int *a, *b, *c, *bufend;
 620 |   int t;
 621 |   int r;
 622 | 
 623 |   bufend = buf + (middle - first) - 1;
 624 |   ss_blockswap(buf, first, middle - first);
 625 | 
 626 |   for(t = *(a = first), b = buf, c = middle;;) {
 627 |     r = ss_compare(T, PA + *b, PA + *c, depth);
 628 |     if(r < 0) {
 629 |       do {
 630 |         *a++ = *b;
 631 |         if(bufend <= b) { *bufend = t; return; }
 632 |         *b++ = *a;
 633 |       } while(*b < 0);
 634 |     } else if(r > 0) {
 635 |       do {
 636 |         *a++ = *c, *c++ = *a;
 637 |         if(last <= c) {
 638 |           while(b < bufend) { *a++ = *b, *b++ = *a; }
 639 |           *a = *b, *b = t;
 640 |           return;
 641 |         }
 642 |       } while(*c < 0);
 643 |     } else {
 644 |       *c = ~*c;
 645 |       do {
 646 |         *a++ = *b;
 647 |         if(bufend <= b) { *bufend = t; return; }
 648 |         *b++ = *a;
 649 |       } while(*b < 0);
 650 | 
 651 |       do {
 652 |         *a++ = *c, *c++ = *a;
 653 |         if(last <= c) {
 654 |           while(b < bufend) { *a++ = *b, *b++ = *a; }
 655 |           *a = *b, *b = t;
 656 |           return;
 657 |         }
 658 |       } while(*c < 0);
 659 |     }
 660 |   }
 661 | }
 662 | 
 663 | /* Merge-backward with internal buffer. */
 664 | static
 665 | void
 666 | ss_mergebackward(const unsigned char *T, const int *PA,
 667 |                  int *first, int *middle, int *last,
 668 |                  int *buf, int depth) {
 669 |   const int *p1, *p2;
 670 |   int *a, *b, *c, *bufend;
 671 |   int t;
 672 |   int r;
 673 |   int x;
 674 | 
 675 |   bufend = buf + (last - middle) - 1;
 676 |   ss_blockswap(buf, middle, last - middle);
 677 | 
 678 |   x = 0;
 679 |   if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
 680 |   else                  { p1 = PA +  *bufend; }
 681 |   if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
 682 |   else                  { p2 = PA +  *(middle - 1); }
 683 |   for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
 684 |     r = ss_compare(T, p1, p2, depth);
 685 |     if(0 < r) {
 686 |       if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
 687 |       *a-- = *b;
 688 |       if(b <= buf) { *buf = t; break; }
 689 |       *b-- = *a;
 690 |       if(*b < 0) { p1 = PA + ~*b; x |= 1; }
 691 |       else       { p1 = PA +  *b; }
 692 |     } else if(r < 0) {
 693 |       if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
 694 |       *a-- = *c, *c-- = *a;
 695 |       if(c < first) {
 696 |         while(buf < b) { *a-- = *b, *b-- = *a; }
 697 |         *a = *b, *b = t;
 698 |         break;
 699 |       }
 700 |       if(*c < 0) { p2 = PA + ~*c; x |= 2; }
 701 |       else       { p2 = PA +  *c; }
 702 |     } else {
 703 |       if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
 704 |       *a-- = ~*b;
 705 |       if(b <= buf) { *buf = t; break; }
 706 |       *b-- = *a;
 707 |       if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
 708 |       *a-- = *c, *c-- = *a;
 709 |       if(c < first) {
 710 |         while(buf < b) { *a-- = *b, *b-- = *a; }
 711 |         *a = *b, *b = t;
 712 |         break;
 713 |       }
 714 |       if(*b < 0) { p1 = PA + ~*b; x |= 1; }
 715 |       else       { p1 = PA +  *b; }
 716 |       if(*c < 0) { p2 = PA + ~*c; x |= 2; }
 717 |       else       { p2 = PA +  *c; }
 718 |     }
 719 |   }
 720 | }
 721 | 
 722 | /* D&C based merge. */
 723 | static
 724 | void
 725 | ss_swapmerge(const unsigned char *T, const int *PA,
 726 |              int *first, int *middle, int *last,
 727 |              int *buf, int bufsize, int depth) {
 728 | #define STACK_SIZE SS_SMERGE_STACKSIZE
 729 | #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
 730 | #define MERGE_CHECK(a, b, c)\
 731 |   do {\
 732 |     if(((c) & 1) ||\
 733 |        (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
 734 |       *(a) = ~*(a);\
 735 |     }\
 736 |     if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
 737 |       *(b) = ~*(b);\
 738 |     }\
 739 |   } while(0)
 740 |   struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
 741 |   int *l, *r, *lm, *rm;
 742 |   int m, len, half;
 743 |   int ssize;
 744 |   int check, next;
 745 | 
 746 |   for(check = 0, ssize = 0;;) {
 747 |     if((last - middle) <= bufsize) {
 748 |       if((first < middle) && (middle < last)) {
 749 |         ss_mergebackward(T, PA, first, middle, last, buf, depth);
 750 |       }
 751 |       MERGE_CHECK(first, last, check);
 752 |       STACK_POP(first, middle, last, check);
 753 |       continue;
 754 |     }
 755 | 
 756 |     if((middle - first) <= bufsize) {
 757 |       if(first < middle) {
 758 |         ss_mergeforward(T, PA, first, middle, last, buf, depth);
 759 |       }
 760 |       MERGE_CHECK(first, last, check);
 761 |       STACK_POP(first, middle, last, check);
 762 |       continue;
 763 |     }
 764 | 
 765 |     for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
 766 |         0 < len;
 767 |         len = half, half >>= 1) {
 768 |       if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
 769 |                        PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
 770 |         m += half + 1;
 771 |         half -= (len & 1) ^ 1;
 772 |       }
 773 |     }
 774 | 
 775 |     if(0 < m) {
 776 |       lm = middle - m, rm = middle + m;
 777 |       ss_blockswap(lm, middle, m);
 778 |       l = r = middle, next = 0;
 779 |       if(rm < last) {
 780 |         if(*rm < 0) {
 781 |           *rm = ~*rm;
 782 |           if(first < lm) { for(; *--l < 0;) { } next |= 4; }
 783 |           next |= 1;
 784 |         } else if(first < lm) {
 785 |           for(; *r < 0; ++r) { }
 786 |           next |= 2;
 787 |         }
 788 |       }
 789 | 
 790 |       if((l - first) <= (last - r)) {
 791 |         STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
 792 |         middle = lm, last = l, check = (check & 3) | (next & 4);
 793 |       } else {
 794 |         if((next & 2) && (r == middle)) { next ^= 6; }
 795 |         STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
 796 |         first = r, middle = rm, check = (next & 3) | (check & 4);
 797 |       }
 798 |     } else {
 799 |       if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
 800 |         *middle = ~*middle;
 801 |       }
 802 |       MERGE_CHECK(first, last, check);
 803 |       STACK_POP(first, middle, last, check);
 804 |     }
 805 |   }
 806 | #undef STACK_SIZE
 807 | }
 808 | 
 809 | #endif /* SS_BLOCKSIZE != 0 */
 810 | 
 811 | 
 812 | /*---------------------------------------------------------------------------*/
 813 | 
 814 | /* Substring sort */
 815 | static
 816 | void
 817 | sssort(const unsigned char *T, const int *PA,
 818 |        int *first, int *last,
 819 |        int *buf, int bufsize,
 820 |        int depth, int n, int lastsuffix) {
 821 |   int *a;
 822 | #if SS_BLOCKSIZE != 0
 823 |   int *b, *middle, *curbuf;
 824 |   int j, k, curbufsize, limit;
 825 | #endif
 826 |   int i;
 827 | 
 828 |   if(lastsuffix != 0) { ++first; }
 829 | 
 830 | #if SS_BLOCKSIZE == 0
 831 |   ss_mintrosort(T, PA, first, last, depth);
 832 | #else
 833 |   if((bufsize < SS_BLOCKSIZE) &&
 834 |       (bufsize < (last - first)) &&
 835 |       (bufsize < (limit = ss_isqrt(last - first)))) {
 836 |     if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
 837 |     buf = middle = last - limit, bufsize = limit;
 838 |   } else {
 839 |     middle = last, limit = 0;
 840 |   }
 841 |   for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
 842 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
 843 |     ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
 844 | #elif 1 < SS_BLOCKSIZE
 845 |     ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
 846 | #endif
 847 |     curbufsize = last - (a + SS_BLOCKSIZE);
 848 |     curbuf = a + SS_BLOCKSIZE;
 849 |     if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
 850 |     for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
 851 |       ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
 852 |     }
 853 |   }
 854 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
 855 |   ss_mintrosort(T, PA, a, middle, depth);
 856 | #elif 1 < SS_BLOCKSIZE
 857 |   ss_insertionsort(T, PA, a, middle, depth);
 858 | #endif
 859 |   for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
 860 |     if(i & 1) {
 861 |       ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
 862 |       a -= k;
 863 |     }
 864 |   }
 865 |   if(limit != 0) {
 866 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
 867 |     ss_mintrosort(T, PA, middle, last, depth);
 868 | #elif 1 < SS_BLOCKSIZE
 869 |     ss_insertionsort(T, PA, middle, last, depth);
 870 | #endif
 871 |     ss_inplacemerge(T, PA, first, middle, last, depth);
 872 |   }
 873 | #endif
 874 | 
 875 |   if(lastsuffix != 0) {
 876 |     /* Insert last type B* suffix. */
 877 |     int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
 878 |     for(a = first, i = *(first - 1);
 879 |         (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
 880 |         ++a) {
 881 |       *(a - 1) = *a;
 882 |     }
 883 |     *(a - 1) = i;
 884 |   }
 885 | }
 886 | 
 887 | 
 888 | /*---------------------------------------------------------------------------*/
 889 | 
 890 | static INLINE
 891 | int
 892 | tr_ilg(int n) {
 893 |   return (n & 0xffff0000) ?
 894 |           ((n & 0xff000000) ?
 895 |             24 + lg_table[(n >> 24) & 0xff] :
 896 |             16 + lg_table[(n >> 16) & 0xff]) :
 897 |           ((n & 0x0000ff00) ?
 898 |              8 + lg_table[(n >>  8) & 0xff] :
 899 |              0 + lg_table[(n >>  0) & 0xff]);
 900 | }
 901 | 
 902 | 
 903 | /*---------------------------------------------------------------------------*/
 904 | 
 905 | /* Simple insertionsort for small size groups. */
 906 | static
 907 | void
 908 | tr_insertionsort(const int *ISAd, int *first, int *last) {
 909 |   int *a, *b;
 910 |   int t, r;
 911 | 
 912 |   for(a = first + 1; a < last; ++a) {
 913 |     for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
 914 |       do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
 915 |       if(b < first) { break; }
 916 |     }
 917 |     if(r == 0) { *b = ~*b; }
 918 |     *(b + 1) = t;
 919 |   }
 920 | }
 921 | 
 922 | 
 923 | /*---------------------------------------------------------------------------*/
 924 | 
 925 | static INLINE
 926 | void
 927 | tr_fixdown(const int *ISAd, int *SA, int i, int size) {
 928 |   int j, k;
 929 |   int v;
 930 |   int c, d, e;
 931 | 
 932 |   for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
 933 |     d = ISAd[SA[k = j++]];
 934 |     if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
 935 |     if(d <= c) { break; }
 936 |   }
 937 |   SA[i] = v;
 938 | }
 939 | 
 940 | /* Simple top-down heapsort. */
 941 | static
 942 | void
 943 | tr_heapsort(const int *ISAd, int *SA, int size) {
 944 |   int i, m;
 945 |   int t;
 946 | 
 947 |   m = size;
 948 |   if((size % 2) == 0) {
 949 |     m--;
 950 |     if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
 951 |   }
 952 | 
 953 |   for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
 954 |   if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
 955 |   for(i = m - 1; 0 < i; --i) {
 956 |     t = SA[0], SA[0] = SA[i];
 957 |     tr_fixdown(ISAd, SA, 0, i);
 958 |     SA[i] = t;
 959 |   }
 960 | }
 961 | 
 962 | 
 963 | /*---------------------------------------------------------------------------*/
 964 | 
 965 | /* Returns the median of three elements. */
 966 | static INLINE
 967 | int *
 968 | tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
 969 |   int *t;
 970 |   if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
 971 |   if(ISAd[*v2] > ISAd[*v3]) {
 972 |     if(ISAd[*v1] > ISAd[*v3]) { return v1; }
 973 |     else { return v3; }
 974 |   }
 975 |   return v2;
 976 | }
 977 | 
 978 | /* Returns the median of five elements. */
 979 | static INLINE
 980 | int *
 981 | tr_median5(const int *ISAd,
 982 |            int *v1, int *v2, int *v3, int *v4, int *v5) {
 983 |   int *t;
 984 |   if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
 985 |   if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
 986 |   if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
 987 |   if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
 988 |   if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
 989 |   if(ISAd[*v3] > ISAd[*v4]) { return v4; }
 990 |   return v3;
 991 | }
 992 | 
 993 | /* Returns the pivot element. */
 994 | static INLINE
 995 | int *
 996 | tr_pivot(const int *ISAd, int *first, int *last) {
 997 |   int *middle;
 998 |   int t;
 999 | 
1000 |   t = last - first;
1001 |   middle = first + t / 2;
1002 | 
1003 |   if(t <= 512) {
1004 |     if(t <= 32) {
1005 |       return tr_median3(ISAd, first, middle, last - 1);
1006 |     } else {
1007 |       t >>= 2;
1008 |       return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1009 |     }
1010 |   }
1011 |   t >>= 3;
1012 |   first  = tr_median3(ISAd, first, first + t, first + (t << 1));
1013 |   middle = tr_median3(ISAd, middle - t, middle, middle + t);
1014 |   last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1015 |   return tr_median3(ISAd, first, middle, last);
1016 | }
1017 | 
1018 | 
1019 | /*---------------------------------------------------------------------------*/
1020 | 
1021 | typedef struct _trbudget_t trbudget_t;
1022 | struct _trbudget_t {
1023 |   int chance;
1024 |   int remain;
1025 |   int incval;
1026 |   int count;
1027 | };
1028 | 
1029 | static INLINE
1030 | void
1031 | trbudget_init(trbudget_t *budget, int chance, int incval) {
1032 |   budget->chance = chance;
1033 |   budget->remain = budget->incval = incval;
1034 | }
1035 | 
1036 | static INLINE
1037 | int
1038 | trbudget_check(trbudget_t *budget, int size) {
1039 |   if(size <= budget->remain) { budget->remain -= size; return 1; }
1040 |   if(budget->chance == 0) { budget->count += size; return 0; }
1041 |   budget->remain += budget->incval - size;
1042 |   budget->chance -= 1;
1043 |   return 1;
1044 | }
1045 | 
1046 | 
1047 | /*---------------------------------------------------------------------------*/
1048 | 
1049 | static INLINE
1050 | void
1051 | tr_partition(const int *ISAd,
1052 |              int *first, int *middle, int *last,
1053 |              int **pa, int **pb, int v) {
1054 |   int *a, *b, *c, *d, *e, *f;
1055 |   int t, s;
1056 |   int x = 0;
1057 | 
1058 |   for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
1059 |   if(((a = b) < last) && (x < v)) {
1060 |     for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
1061 |       if(x == v) { SWAP(*b, *a); ++a; }
1062 |     }
1063 |   }
1064 |   for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
1065 |   if((b < (d = c)) && (x > v)) {
1066 |     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1067 |       if(x == v) { SWAP(*c, *d); --d; }
1068 |     }
1069 |   }
1070 |   for(; b < c;) {
1071 |     SWAP(*b, *c);
1072 |     for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
1073 |       if(x == v) { SWAP(*b, *a); ++a; }
1074 |     }
1075 |     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1076 |       if(x == v) { SWAP(*c, *d); --d; }
1077 |     }
1078 |   }
1079 | 
1080 |   if(a <= d) {
1081 |     c = b - 1;
1082 |     if((s = a - first) > (t = b - a)) { s = t; }
1083 |     for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1084 |     if((s = d - c) > (t = last - d - 1)) { s = t; }
1085 |     for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1086 |     first += (b - a), last -= (d - c);
1087 |   }
1088 |   *pa = first, *pb = last;
1089 | }
1090 | 
1091 | static
1092 | void
1093 | tr_copy(int *ISA, const int *SA,
1094 |         int *first, int *a, int *b, int *last,
1095 |         int depth) {
1096 |   /* sort suffixes of middle partition
1097 |      by using sorted order of suffixes of left and right partition. */
1098 |   int *c, *d, *e;
1099 |   int s, v;
1100 | 
1101 |   v = b - SA - 1;
1102 |   for(c = first, d = a - 1; c <= d; ++c) {
1103 |     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1104 |       *++d = s;
1105 |       ISA[s] = d - SA;
1106 |     }
1107 |   }
1108 |   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1109 |     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1110 |       *--d = s;
1111 |       ISA[s] = d - SA;
1112 |     }
1113 |   }
1114 | }
1115 | 
1116 | static
1117 | void
1118 | tr_partialcopy(int *ISA, const int *SA,
1119 |                int *first, int *a, int *b, int *last,
1120 |                int depth) {
1121 |   int *c, *d, *e;
1122 |   int s, v;
1123 |   int rank, lastrank, newrank = -1;
1124 | 
1125 |   v = b - SA - 1;
1126 |   lastrank = -1;
1127 |   for(c = first, d = a - 1; c <= d; ++c) {
1128 |     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1129 |       *++d = s;
1130 |       rank = ISA[s + depth];
1131 |       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1132 |       ISA[s] = newrank;
1133 |     }
1134 |   }
1135 | 
1136 |   lastrank = -1;
1137 |   for(e = d; first <= e; --e) {
1138 |     rank = ISA[*e];
1139 |     if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
1140 |     if(newrank != rank) { ISA[*e] = newrank; }
1141 |   }
1142 | 
1143 |   lastrank = -1;
1144 |   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1145 |     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1146 |       *--d = s;
1147 |       rank = ISA[s + depth];
1148 |       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1149 |       ISA[s] = newrank;
1150 |     }
1151 |   }
1152 | }
1153 | 
1154 | static
1155 | void
1156 | tr_introsort(int *ISA, const int *ISAd,
1157 |              int *SA, int *first, int *last,
1158 |              trbudget_t *budget) {
1159 | #define STACK_SIZE TR_STACKSIZE
1160 |   struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
1161 |   int *a, *b, *c;
1162 |   int t;
1163 |   int v, x = 0;
1164 |   int incr = ISAd - ISA;
1165 |   int limit, next;
1166 |   int ssize, trlink = -1;
1167 | 
1168 |   for(ssize = 0, limit = tr_ilg(last - first);;) {
1169 | 
1170 |     if(limit < 0) {
1171 |       if(limit == -1) {
1172 |         /* tandem repeat partition */
1173 |         tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
1174 | 
1175 |         /* update ranks */
1176 |         if(a < last) {
1177 |           for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1178 |         }
1179 |         if(b < last) {
1180 |           for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1181 |         }
1182 | 
1183 |         /* push */
1184 |         if(1 < (b - a)) {
1185 |           STACK_PUSH5(NULL, a, b, 0, 0);
1186 |           STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1187 |           trlink = ssize - 2;
1188 |         }
1189 |         if((a - first) <= (last - b)) {
1190 |           if(1 < (a - first)) {
1191 |             STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
1192 |             last = a, limit = tr_ilg(a - first);
1193 |           } else if(1 < (last - b)) {
1194 |             first = b, limit = tr_ilg(last - b);
1195 |           } else {
1196 |             STACK_POP5(ISAd, first, last, limit, trlink);
1197 |           }
1198 |         } else {
1199 |           if(1 < (last - b)) {
1200 |             STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
1201 |             first = b, limit = tr_ilg(last - b);
1202 |           } else if(1 < (a - first)) {
1203 |             last = a, limit = tr_ilg(a - first);
1204 |           } else {
1205 |             STACK_POP5(ISAd, first, last, limit, trlink);
1206 |           }
1207 |         }
1208 |       } else if(limit == -2) {
1209 |         /* tandem repeat copy */
1210 |         a = stack[--ssize].b, b = stack[ssize].c;
1211 |         if(stack[ssize].d == 0) {
1212 |           tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
1213 |         } else {
1214 |           if(0 <= trlink) { stack[trlink].d = -1; }
1215 |           tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
1216 |         }
1217 |         STACK_POP5(ISAd, first, last, limit, trlink);
1218 |       } else {
1219 |         /* sorted partition */
1220 |         if(0 <= *first) {
1221 |           a = first;
1222 |           do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
1223 |           first = a;
1224 |         }
1225 |         if(first < last) {
1226 |           a = first; do { *a = ~*a; } while(*++a < 0);
1227 |           next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
1228 |           if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
1229 | 
1230 |           /* push */
1231 |           if(trbudget_check(budget, a - first)) {
1232 |             if((a - first) <= (last - a)) {
1233 |               STACK_PUSH5(ISAd, a, last, -3, trlink);
1234 |               ISAd += incr, last = a, limit = next;
1235 |             } else {
1236 |               if(1 < (last - a)) {
1237 |                 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
1238 |                 first = a, limit = -3;
1239 |               } else {
1240 |                 ISAd += incr, last = a, limit = next;
1241 |               }
1242 |             }
1243 |           } else {
1244 |             if(0 <= trlink) { stack[trlink].d = -1; }
1245 |             if(1 < (last - a)) {
1246 |               first = a, limit = -3;
1247 |             } else {
1248 |               STACK_POP5(ISAd, first, last, limit, trlink);
1249 |             }
1250 |           }
1251 |         } else {
1252 |           STACK_POP5(ISAd, first, last, limit, trlink);
1253 |         }
1254 |       }
1255 |       continue;
1256 |     }
1257 | 
1258 |     if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
1259 |       tr_insertionsort(ISAd, first, last);
1260 |       limit = -3;
1261 |       continue;
1262 |     }
1263 | 
1264 |     if(limit-- == 0) {
1265 |       tr_heapsort(ISAd, first, last - first);
1266 |       for(a = last - 1; first < a; a = b) {
1267 |         for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1268 |       }
1269 |       limit = -3;
1270 |       continue;
1271 |     }
1272 | 
1273 |     /* choose pivot */
1274 |     a = tr_pivot(ISAd, first, last);
1275 |     SWAP(*first, *a);
1276 |     v = ISAd[*first];
1277 | 
1278 |     /* partition */
1279 |     tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1280 |     if((last - first) != (b - a)) {
1281 |       next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
1282 | 
1283 |       /* update ranks */
1284 |       for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1285 |       if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
1286 | 
1287 |       /* push */
1288 |       if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
1289 |         if((a - first) <= (last - b)) {
1290 |           if((last - b) <= (b - a)) {
1291 |             if(1 < (a - first)) {
1292 |               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1293 |               STACK_PUSH5(ISAd, b, last, limit, trlink);
1294 |               last = a;
1295 |             } else if(1 < (last - b)) {
1296 |               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1297 |               first = b;
1298 |             } else {
1299 |               ISAd += incr, first = a, last = b, limit = next;
1300 |             }
1301 |           } else if((a - first) <= (b - a)) {
1302 |             if(1 < (a - first)) {
1303 |               STACK_PUSH5(ISAd, b, last, limit, trlink);
1304 |               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1305 |               last = a;
1306 |             } else {
1307 |               STACK_PUSH5(ISAd, b, last, limit, trlink);
1308 |               ISAd += incr, first = a, last = b, limit = next;
1309 |             }
1310 |           } else {
1311 |             STACK_PUSH5(ISAd, b, last, limit, trlink);
1312 |             STACK_PUSH5(ISAd, first, a, limit, trlink);
1313 |             ISAd += incr, first = a, last = b, limit = next;
1314 |           }
1315 |         } else {
1316 |           if((a - first) <= (b - a)) {
1317 |             if(1 < (last - b)) {
1318 |               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1319 |               STACK_PUSH5(ISAd, first, a, limit, trlink);
1320 |               first = b;
1321 |             } else if(1 < (a - first)) {
1322 |               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1323 |               last = a;
1324 |             } else {
1325 |               ISAd += incr, first = a, last = b, limit = next;
1326 |             }
1327 |           } else if((last - b) <= (b - a)) {
1328 |             if(1 < (last - b)) {
1329 |               STACK_PUSH5(ISAd, first, a, limit, trlink);
1330 |               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1331 |               first = b;
1332 |             } else {
1333 |               STACK_PUSH5(ISAd, first, a, limit, trlink);
1334 |               ISAd += incr, first = a, last = b, limit = next;
1335 |             }
1336 |           } else {
1337 |             STACK_PUSH5(ISAd, first, a, limit, trlink);
1338 |             STACK_PUSH5(ISAd, b, last, limit, trlink);
1339 |             ISAd += incr, first = a, last = b, limit = next;
1340 |           }
1341 |         }
1342 |       } else {
1343 |         if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1344 |         if((a - first) <= (last - b)) {
1345 |           if(1 < (a - first)) {
1346 |             STACK_PUSH5(ISAd, b, last, limit, trlink);
1347 |             last = a;
1348 |           } else if(1 < (last - b)) {
1349 |             first = b;
1350 |           } else {
1351 |             STACK_POP5(ISAd, first, last, limit, trlink);
1352 |           }
1353 |         } else {
1354 |           if(1 < (last - b)) {
1355 |             STACK_PUSH5(ISAd, first, a, limit, trlink);
1356 |             first = b;
1357 |           } else if(1 < (a - first)) {
1358 |             last = a;
1359 |           } else {
1360 |             STACK_POP5(ISAd, first, last, limit, trlink);
1361 |           }
1362 |         }
1363 |       }
1364 |     } else {
1365 |       if(trbudget_check(budget, last - first)) {
1366 |         limit = tr_ilg(last - first), ISAd += incr;
1367 |       } else {
1368 |         if(0 <= trlink) { stack[trlink].d = -1; }
1369 |         STACK_POP5(ISAd, first, last, limit, trlink);
1370 |       }
1371 |     }
1372 |   }
1373 | #undef STACK_SIZE
1374 | }
1375 | 
1376 | 
1377 | 
1378 | /*---------------------------------------------------------------------------*/
1379 | 
1380 | /* Tandem repeat sort */
1381 | static
1382 | void
1383 | trsort(int *ISA, int *SA, int n, int depth) {
1384 |   int *ISAd;
1385 |   int *first, *last;
1386 |   trbudget_t budget;
1387 |   int t, skip, unsorted;
1388 | 
1389 |   trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
1390 | /*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
1391 |   for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
1392 |     first = SA;
1393 |     skip = 0;
1394 |     unsorted = 0;
1395 |     do {
1396 |       if((t = *first) < 0) { first -= t; skip += t; }
1397 |       else {
1398 |         if(skip != 0) { *(first + skip) = skip; skip = 0; }
1399 |         last = SA + ISA[t] + 1;
1400 |         if(1 < (last - first)) {
1401 |           budget.count = 0;
1402 |           tr_introsort(ISA, ISAd, SA, first, last, &budget);
1403 |           if(budget.count != 0) { unsorted += budget.count; }
1404 |           else { skip = first - last; }
1405 |         } else if((last - first) == 1) {
1406 |           skip = -1;
1407 |         }
1408 |         first = last;
1409 |       }
1410 |     } while(first < (SA + n));
1411 |     if(skip != 0) { *(first + skip) = skip; }
1412 |     if(unsorted == 0) { break; }
1413 |   }
1414 | }
1415 | 
1416 | 
1417 | /*---------------------------------------------------------------------------*/
1418 | 
1419 | /* Sorts suffixes of type B*. */
1420 | static
1421 | int
1422 | sort_typeBstar(const unsigned char *T, int *SA,
1423 |                int *bucket_A, int *bucket_B,
1424 |                int n) {
1425 |   int *PAb, *ISAb, *buf;
1426 | #ifdef _OPENMP
1427 |   int *curbuf;
1428 |   int l;
1429 | #endif
1430 |   int i, j, k, t, m, bufsize;
1431 |   int c0, c1;
1432 | #ifdef _OPENMP
1433 |   int d0, d1;
1434 |   int tmp;
1435 | #endif
1436 | 
1437 |   /* Count the number of occurrences of the first one or two characters of each
1438 |      type A, B and B* suffix. Moreover, store the beginning position of all
1439 |      type B* suffixes into the array SA. */
1440 |   for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
1441 |     /* type A suffix. */
1442 |     do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
1443 |     if(0 <= i) {
1444 |       /* type B* suffix. */
1445 |       ++BUCKET_BSTAR(c0, c1);
1446 |       SA[--m] = i;
1447 |       /* type B suffix. */
1448 |       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
1449 |         ++BUCKET_B(c0, c1);
1450 |       }
1451 |     }
1452 |   }
1453 |   m = n - m;
1454 | /*
1455 | note:
1456 |   A type B* suffix is lexicographically smaller than a type B suffix that
1457 |   begins with the same first two characters.
1458 | */
1459 | 
1460 |   /* Calculate the index of start/end point of each bucket. */
1461 |   for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
1462 |     t = i + BUCKET_A(c0);
1463 |     BUCKET_A(c0) = i + j; /* start point */
1464 |     i = t + BUCKET_B(c0, c0);
1465 |     for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
1466 |       j += BUCKET_BSTAR(c0, c1);
1467 |       BUCKET_BSTAR(c0, c1) = j; /* end point */
1468 |       i += BUCKET_B(c0, c1);
1469 |     }
1470 |   }
1471 | 
1472 |   if(0 < m) {
1473 |     /* Sort the type B* suffixes by their first two characters. */
1474 |     PAb = SA + n - m; ISAb = SA + m;
1475 |     for(i = m - 2; 0 <= i; --i) {
1476 |       t = PAb[i], c0 = T[t], c1 = T[t + 1];
1477 |       SA[--BUCKET_BSTAR(c0, c1)] = i;
1478 |     }
1479 |     t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1480 |     SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
1481 | 
1482 |     /* Sort the type B* substrings using sssort. */
1483 | #ifdef _OPENMP
1484 |     tmp = omp_get_max_threads();
1485 |     buf = SA + m, bufsize = (n - (2 * m)) / tmp;
1486 |     c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
1487 | #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
1488 |     {
1489 |       tmp = omp_get_thread_num();
1490 |       curbuf = buf + tmp * bufsize;
1491 |       k = 0;
1492 |       for(;;) {
1493 |         #pragma omp critical(sssort_lock)
1494 |         {
1495 |           if(0 < (l = j)) {
1496 |             d0 = c0, d1 = c1;
1497 |             do {
1498 |               k = BUCKET_BSTAR(d0, d1);
1499 |               if(--d1 <= d0) {
1500 |                 d1 = ALPHABET_SIZE - 1;
1501 |                 if(--d0 < 0) { break; }
1502 |               }
1503 |             } while(((l - k) <= 1) && (0 < (l = k)));
1504 |             c0 = d0, c1 = d1, j = k;
1505 |           }
1506 |         }
1507 |         if(l == 0) { break; }
1508 |         sssort(T, PAb, SA + k, SA + l,
1509 |                curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
1510 |       }
1511 |     }
1512 | #else
1513 |     buf = SA + m, bufsize = n - (2 * m);
1514 |     for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1515 |       for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1516 |         i = BUCKET_BSTAR(c0, c1);
1517 |         if(1 < (j - i)) {
1518 |           sssort(T, PAb, SA + i, SA + j,
1519 |                  buf, bufsize, 2, n, *(SA + i) == (m - 1));
1520 |         }
1521 |       }
1522 |     }
1523 | #endif
1524 | 
1525 |     /* Compute ranks of type B* substrings. */
1526 |     for(i = m - 1; 0 <= i; --i) {
1527 |       if(0 <= SA[i]) {
1528 |         j = i;
1529 |         do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
1530 |         SA[i + 1] = i - j;
1531 |         if(i <= 0) { break; }
1532 |       }
1533 |       j = i;
1534 |       do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
1535 |       ISAb[SA[i]] = j;
1536 |     }
1537 | 
1538 |     /* Construct the inverse suffix array of type B* suffixes using trsort. */
1539 |     trsort(ISAb, SA, m, 1);
1540 | 
1541 |     /* Set the sorted order of tyoe B* suffixes. */
1542 |     for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1543 |       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1544 |       if(0 <= i) {
1545 |         t = i;
1546 |         for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
1547 |         SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1548 |       }
1549 |     }
1550 | 
1551 |     /* Calculate the index of start/end point of each bucket. */
1552 |     BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
1553 |     for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
1554 |       i = BUCKET_A(c0 + 1) - 1;
1555 |       for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
1556 |         t = i - BUCKET_B(c0, c1);
1557 |         BUCKET_B(c0, c1) = i; /* end point */
1558 | 
1559 |         /* Move all type B* suffixes to the correct position. */
1560 |         for(i = t, j = BUCKET_BSTAR(c0, c1);
1561 |             j <= k;
1562 |             --i, --k) { SA[i] = SA[k]; }
1563 |       }
1564 |       BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
1565 |       BUCKET_B(c0, c0) = i; /* end point */
1566 |     }
1567 |   }
1568 | 
1569 |   return m;
1570 | }
1571 | 
1572 | /* Constructs the suffix array by using the sorted order of type B* suffixes. */
1573 | static
1574 | void
1575 | construct_SA(const unsigned char *T, int *SA,
1576 |              int *bucket_A, int *bucket_B,
1577 |              int n, int m) {
1578 |   int *i, *j, *k;
1579 |   int s;
1580 |   int c0, c1, c2;
1581 | 
1582 |   if(0 < m) {
1583 |     /* Construct the sorted order of type B suffixes by using
1584 |        the sorted order of type B* suffixes. */
1585 |     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1586 |       /* Scan the suffix array from right to left. */
1587 |       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1588 |           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1589 |           i <= j;
1590 |           --j) {
1591 |         if(0 < (s = *j)) {
1592 |           *j = ~s;
1593 |           c0 = T[--s];
1594 |           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1595 |           if(c0 != c2) {
1596 |             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1597 |             k = SA + BUCKET_B(c2 = c0, c1);
1598 |           }
1599 |           *k-- = s;
1600 |         } else*j = ~s;
1601 |       }
1602 |     }
1603 |   }
1604 | 
1605 |   /* Construct the suffix array by using
1606 |      the sorted order of type B suffixes. */
1607 |   k = SA + BUCKET_A(c2 = T[n - 1]);
1608 |   *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
1609 |   /* Scan the suffix array from left to right. */
1610 |   for(i = SA, j = SA + n; i < j; ++i) {
1611 |     if(0 < (s = *i)) {
1612 |       c0 = T[--s];
1613 |       if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
1614 |       if(c0 != c2) {
1615 |         BUCKET_A(c2) = k - SA;
1616 |         k = SA + BUCKET_A(c2 = c0);
1617 |       }
1618 |       *k++ = s;
1619 |     } else*i = ~s;
1620 |   }
1621 | }
1622 | /*---------------------------------------------------------------------------*/
1623 | 
1624 | /*- Function -*/
1625 | 
1626 | int
1627 | divsufsort(const unsigned char *T, int *SA, int *bucket, int n) {
1628 |   int *bucket_A=bucket, *bucket_B=bucket+BUCKET_A_SIZE;
1629 |   int m;
1630 |   int err = 0;
1631 | 
1632 |   /* Check arguments. */
1633 |   if((T == NULL) || (SA == NULL) || (n < 0))return-1;
1634 |   if(n == 0)return 0;
1635 |   if(n == 1)return SA[0]=0;
1636 |   if(n == 2){SA[m=T[0]<T[1]]=1;return SA[m^1]=0;}
1637 | 
1638 |   /* Suffixsort. */
1639 |   if(bucket)
1640 |     m = sort_typeBstar(T, SA, bucket_A, bucket_B, n),
1641 |     construct_SA(T, SA, bucket_A, bucket_B, n, m);
1642 |   else err = -2;
1643 |   return err;
1644 | }


--------------------------------------------------------------------------------
/divsufsort.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * divsufsort.h for libdivsufsort-lite
 3 |  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person
 6 |  * obtaining a copy of this software and associated documentation
 7 |  * files (the "Software"), to deal in the Software without
 8 |  * restriction, including without limitation the rights to use,
 9 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the
11 |  * Software is furnished to do so, subject to the following
12 |  * conditions:
13 |  *
14 |  * The above copyright notice and this permission notice shall be
15 |  * included in all copies or substantial portions of the Software.
16 |  *
17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 |  * OTHER DEALINGS IN THE SOFTWARE.
25 |  */
26 | 
27 | #ifndef _DIVSUFSORT_H
28 | #define _DIVSUFSORT_H 1
29 | 
30 | #ifdef __cplusplus
31 | extern "C" {
32 | #endif /* __cplusplus */
33 | 
34 | 
35 | /*- Prototypes -*/
36 | 
37 | /**
38 |  * Constructs the suffix array of a given string.
39 |  * @param T[0..n-1] The input string.
40 |  * @param SA[0..n-1] The output array of suffixes.
41 |  * @param n The length of the given string.
42 |  * @param openMP enables OpenMP optimization.
43 |  * @return 0 if no error occurred, -1 or -2 otherwise.
44 |  */
45 | int divsufsort(const unsigned char *T, int *SA, int *bucket, int n);
46 | #ifdef __cplusplus
47 | } /* extern "C" */
48 | #endif /* __cplusplus */
49 | 
50 | #endif /* _DIVSUFSORT_H */


--------------------------------------------------------------------------------
/e8.h:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | 
 3 | void e8(uint8_t *in_buf, int32_t n) {
 4 |   int32_t i;
 5 |   int32_t *op;
 6 |   for(i=0; i<n-5;) {
 7 |     uint8_t b = in_buf[i];
 8 |     if ((b == 0xF) && (in_buf[i+1] & 0xF0) == 0x80) {
 9 |       i++;
10 |       b = 0xe8;
11 |     }
12 |     i++;
13 |     b &= 0xFE;
14 |     if (b == 0xe8) {
15 |        op = (int32_t *)(in_buf+i);
16 |        if (*op >= -i && *op < n-i) {
17 |          *op += i;
18 |        } else if ( *op >= n-i && *op < n ) {
19 |          *op -= n; // to [-i,1] 
20 |        }
21 |        i+=4;
22 |     }
23 |   }
24 | }
25 | 
26 | void e8back(uint8_t *buf,int32_t n) {
27 |   int32_t i;
28 |   int32_t *op;
29 |   for(i=0; i<n-5;) {
30 | 	uint8_t b = buf[i];
31 |     if ((b == 0xF) && (buf[i+1] & 0xF0) == 0x80) {
32 |       i++;
33 |       b = 0xe8;
34 |     }
35 |     b &= 0xFE;
36 |     i++;
37 | 
38 |     if (b == 0xe8) {
39 |        op = (int32_t *)(buf+i);
40 |        if (*op >= -i && *op < 0) {
41 |          *op += n;
42 |        } else if ( *op >= 0 && *op < n ) {
43 |          *op -= i;
44 |        }
45 |        i+=4;
46 |     } 
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/lzoma.h:
--------------------------------------------------------------------------------
 1 | #define AuthorID 0xA1Ef
 2 | #define AlgoID "LZOM"
 3 | #define Version 0x00
 4 | 
 5 | #define BLOCK_STORED 0x80000000
 6 | #define BLOCK_LAST 0x40000000
 7 | 
 8 | #define HISTORY_SIZE(dict_size) (32*1024<<dict_size)
 9 | #define BLOCK_SIZE(dict_size) (HISTORY_SIZE(dict_size) >> 4)
10 | 
11 | #define longlen 5400
12 | #define hugelen 0x060000
13 | #define breaklz 512
14 | #define lzmagic 0x002FFe00*2
15 | #define lzshift(top) ((9*top)>>3)
16 | 
17 | #define lzlow(total) ((total <= 400000) ? 60 :50)
18 | 
19 | 


--------------------------------------------------------------------------------
/pack.c:
--------------------------------------------------------------------------------
   1 | // test file compression based on lzoma algorith
   2 | // (c) 2015,2016 Alexandr Efimov
   3 | // License: GPL 2.0 or later
   4 | // Uses divsufsort library for faster initialization (thanks to xezz for suggestion), see divsufsort.h for its license details.
   5 | //
   6 | // Discussion thread: http://encode.ru/threads/2280-LZOMA
   7 | //
   8 | // Notes:
   9 | //
  10 | // Pros:
  11 | // Compression ratio is very good (much higher than lzo, ucl, gzip).
  12 | // Decompression speed is very high (faster than gzip, much faster than bzip2,lzham, lzma,xz)
  13 | // tiny decompressor code (asm version of decompress function less than 400 bytes)
  14 | //
  15 | // compressed data format is somewhere between lzo and lzma
  16 | // uses static encoding and byte-aligned literals and byte-aligned parts of match offset for decompression speed
  17 | //
  18 | // Cons:
  19 | // compressor is VERY slow. It is possible to implement faster compressor at the cost of some compression ratio.
  20 | // may be it is possible to adapt lzma compressor code.
  21 | //
  22 | // Other:
  23 | // Code of both compression/decompression utils is experimental.
  24 | // compressed data format is not stable yet.
  25 | // compressor source code is more like a ground for experiments, not a finished product yet.
  26 | // some commented out code was intended for experiments with Reduced-offset LZ, RLE step before LZ, LZX-style encoding of matches, various heuristics, etc.
  27 | //
  28 | #include <stdio.h>
  29 | #include <stdlib.h>
  30 | #include"divsufsort.h"
  31 | 
  32 | #include "lzoma.h"
  33 | #include "bpe.h"
  34 | #include "e8.h"
  35 | 
  36 | #define MINOLEN 1
  37 | #define MINLZ 2
  38 | 
  39 | int level, short_match_level, match_level;
  40 |   int levels[9][3]= {
  41 |     {1,1,2},
  42 |     {1,2,3},
  43 |     {2,3,5},
  44 |     {3,5,15},
  45 |     {3,7,30},
  46 |     {3,10,100},
  47 |     {3,20,200}, // default level -7
  48 |     {3,40,500},
  49 |     {3,100,1000}
  50 |   };
  51 | int verbose = 0;
  52 | 
  53 | int dict_size, history_size, block_size;
  54 | 
  55 | FILE *flzlit=NULL;
  56 | FILE *flit=NULL;
  57 | FILE *folz=NULL;
  58 | FILE *flen=NULL;
  59 | FILE *fdist=NULL;
  60 | #ifdef EXPERIMENTS
  61 | // this is for some experimention only.
  62 | FILE *test=NULL;
  63 | FILE *test2=NULL;
  64 | FILE *test3=NULL;
  65 | #endif
  66 | 
  67 | uint32_t *rle;
  68 | uint8_t *in_buf; /* text to be encoded */
  69 | //uint8_t *out_buf; - shared with rle
  70 | #define out_buf ((uint8_t *)rle)
  71 | 
  72 | typedef struct {
  73 |   int32_t cache; /* best possible result in bits if we start with lz or letter code */
  74 |   int32_t best_ofs; /* best way to start, assuming we do not start with OLD OFFSET code */
  75 |   int32_t best_len; /* best way to start - match len, assuming we do not start with OLD OFFSET code */
  76 |   int32_t use_olz; /* if not zero, repeat same offset after this number of literals */
  77 |   int32_t olz_len; /* length of repeated lz */
  78 |   int32_t use_olz2; /* if not zero, repeat same offset after this number of literals after first repeat */
  79 |   int32_t olz_len2; /* length of repeated lz */
  80 | } FutureState;
  81 | 
  82 | typedef struct {
  83 |   int32_t same; /* pointer to previous match of at least 2 bytes. for checking nearby short matches */
  84 |   int32_t samelen; /* length of match between this and previous string pointed by "same" */
  85 | 
  86 |   // sorted tree in order to quickly check long matches starting from the longest match
  87 |   int32_t sorted_len;
  88 |   int32_t sorted_prev;
  89 |   int32_t sorted_next;
  90 | } PastState;
  91 | 
  92 | // PastState and FutureState share the same memory buffer
  93 | // sizeof(PastState) should be < sizeof(FutureState)
  94 | void *state;
  95 | void *past_state;
  96 | 
  97 | #define sorted ((int32_t *)((uint8_t *)state)) // used very early in initialization
  98 | 
  99 | #define cache(i) ((FutureState *)state)[i-in_offset].cache
 100 | #define best_ofs(i) ((FutureState *)state)[i-in_offset].best_ofs
 101 | #define best_len(i) ((FutureState *)state)[i-in_offset].best_len
 102 | #define use_olz(i) ((FutureState *)state)[i-in_offset].use_olz
 103 | #define olz_len(i) ((FutureState *)state)[i-in_offset].olz_len
 104 | #define use_olz2(i) ((FutureState *)state)[i-in_offset].use_olz2
 105 | #define olz_len2(i) ((FutureState *)state)[i-in_offset].olz_len2
 106 | 
 107 | #define same(i) (((PastState *)past_state)[i].same)
 108 | #define samelen(i) (((PastState *)past_state)[i].samelen)
 109 | #define sorted_len(i) (((PastState *)past_state)[i].sorted_len)
 110 | #define sorted_prev(i) (((PastState *)past_state)[i].sorted_prev)
 111 | #define sorted_next(i) (((PastState *)past_state)[i].sorted_next)
 112 | 
 113 | #ifdef _MSC_VER
 114 | #include <intrin.h>
 115 | uint32_t __inline __builtin_clz( uint32_t value )
 116 | {
 117 |     uint32_t leading_zero = 0;
 118 | 
 119 |     if ( _BitScanReverse( &leading_zero, value ) )
 120 |     {
 121 |        return 31 - leading_zero;
 122 |     }
 123 |     else
 124 |     {
 125 |          return 32;
 126 |     }
 127 | }
 128 | #endif
 129 | 
 130 | int in_offset = 0;
 131 | 
 132 | static inline int price_offset(int num,int total) {
 133 |   if (total<=256) return 8;//top=0;
 134 |   register int res=8;
 135 |   register int x=256;
 136 | 
 137 |   int top = lzlow(total);
 138 |   while (1) {
 139 |     x+=x;
 140 |     if (x>=total+top) break; /* only 1 bit to be outputted left */
 141 |       if (x & lzmagic)
 142 |         top=lzshift(top);
 143 |     //if (x>=breaklz) {
 144 |       if (num<top) { return res;}
 145 |       num+=top;
 146 |       total+=top;
 147 |       top+=top;
 148 |     //}
 149 |     res++;
 150 |   }
 151 |   if (num>=x-total) { res++;}
 152 |   return res;
 153 | }
 154 | 
 155 | static inline int price_replen(int num) {//num>=2
 156 |   if (num<4) return 2;// 00 01
 157 |   num-=2;
 158 |   #define REPLEN_SKEW 1
 159 |   return REPLEN_SKEW+((31-__builtin_clz(num))<<1);
 160 | }
 161 | 
 162 | static inline int price_len(int num) {//num>=2
 163 |   if (num<4) return 2;// 00 01 10 
 164 |   num-=2;
 165 |   #define LEN_SKEW 1
 166 |   return LEN_SKEW+((31-__builtin_clz(num))<<1);
 167 | }
 168 | 
 169 | int lastpos;
 170 | unsigned int bit_cnt;
 171 | int outpos;
 172 | 
 173 | static inline void putbit(int bit) {
 174 |   bit_cnt>>=1;
 175 |   if (bit_cnt==0) {
 176 |     lastpos=outpos;
 177 |     *(unsigned long*)(out_buf+lastpos)=0;
 178 |     outpos+=4;
 179 |     bit_cnt=0x80000000;
 180 |   }
 181 |   if (bit) *(unsigned long *)(out_buf+lastpos)|=bit_cnt;
 182 | }
 183 | 
 184 | int stlet=0;
 185 | int stlz=0;
 186 | int stolz=0;
 187 | int bitslzlen=0;
 188 | int bitsolzlen=0;
 189 | int bitslen=0;
 190 | int bitsdist=0;
 191 | int bitslit=0;
 192 | 
 193 | static inline void putenc(int num,int total, int break_at, int debug) {
 194 |   char bits[100];
 195 |   int res=0;
 196 |   int x=1;
 197 |   int obyte=0;
 198 |   if (fdist) fwrite(&num,1,4,fdist);
 199 |   obyte=1;
 200 |   bits[0]=0;
 201 |   bits[1]=0;
 202 |   bits[2]=0;
 203 |   bits[3]=0;
 204 |   bits[4]=0;
 205 |   bits[5]=0;
 206 |   bits[6]=0;
 207 |   bits[7]=0;
 208 |   //if (debug) fprintf(stderr,"ofs=%d total=%d\n",num,total);
 209 | 
 210 |   int top=lzlow(total);
 211 |   //if (total<=256) top=0;
 212 |   while (1) {
 213 |     x+=x;
 214 |     if (x>=512&& x>=total+top) break; /* only 1 bit to be outputted left */
 215 |       if (x & lzmagic) 
 216 |         top=lzshift(top);
 217 |     if (x>=break_at) {
 218 |       if (num<top) {  goto doneit;}
 219 |       num+=top;
 220 |       total+=top;
 221 |       top+=top;
 222 |     }
 223 |     bits[res++]=2;
 224 |   }
 225 |   x-=total;
 226 |   if (num>=x) {
 227 |     num+=x;
 228 |     bits[res++]=2;
 229 |   }
 230 | 
 231 | doneit: 
 232 |   for(;res<8;res++) {
 233 |     bits[res++]=2;
 234 |   }
 235 |   for(x=res-1;x>=0;x--) {
 236 |     if (bits[x]==2) {
 237 |       bits[x]=num&1;
 238 |       num>>=1;
 239 |     }
 240 |   }
 241 |   if (obyte) {
 242 |     //printf("res=%d\n", res);
 243 |     uint8_t b=0;
 244 |     for(x=0;x<8;x++) {
 245 |       if (debug) printf("%d",bits[x]);
 246 |       if (bits[x]) b|=128>>x;
 247 |     }
 248 |     if (debug) printf(" ");
 249 |     if (!debug) out_buf[outpos++]=b;
 250 |     for(;x<res;x++) {
 251 |       if (debug) printf("%d",bits[x]);
 252 |       if (!debug) putbit(bits[x]);
 253 |     }
 254 |   }
 255 |   else 
 256 |     for(x=0;x<res;x++) {
 257 |       if (debug) printf("%d",bits[x]);
 258 |       putbit(bits[x]);
 259 |   }
 260 |   bitsdist+=res;
 261 | }
 262 | 
 263 | static inline void putenc_l(int num) {
 264 |   char bits[100];
 265 |   int res=0;
 266 |   int x=1;
 267 |   int obyte=0;
 268 |   if (flen) fwrite(&num,1,4,flen);
 269 | 
 270 |   if (num==0) {bitslen+=2; putbit(0);putbit(0);return;}
 271 |   if (num==1) {bitslen+=2; putbit(0);putbit(1);return;}
 272 |   putbit(1);num-=2;bitslen++;
 273 |   x+=x;
 274 |   bits[res++]=2;
 275 | 
 276 |   while (1) {
 277 |     x+=x;
 278 |     if (num<(x>>1)) {bits[res++]=0; break;}
 279 |     bits[res++]=1;
 280 |     num-=x>>1;
 281 |     bits[res++]=2;
 282 |   }
 283 | 
 284 |   for(x=res-1;x>=0;x--) {
 285 |     if (bits[x]==2) {
 286 |       bits[x]=num&1;
 287 |       num>>=1;
 288 |     }
 289 |   }
 290 |   for(x=0;x<res;x++) {
 291 |     putbit(bits[x]);
 292 |   }
 293 |   bitslen+=res;
 294 | }
 295 | 
 296 | int old_ofs=0;
 297 | int was_letter=1;
 298 | 
 299 | void initout(int start) {
 300 |   outpos = 0;
 301 |   if (start==1) {
 302 |     out_buf[outpos++]=in_buf[0];
 303 |   }
 304 |   old_ofs=0;
 305 |   bit_cnt=1;
 306 |   was_letter=1;
 307 | }
 308 | 
 309 | static inline int Min(int a,int b) {
 310 |   return a<b? a:b;
 311 | }
 312 | 
 313 | static inline int Max(int a,int b) {
 314 |   return a>b? a:b;
 315 | }
 316 | 
 317 | static inline void put_lz(int offset,int length,int used) {
 318 | #ifdef EXPERIMENTS
 319 |   uint16_t code512 = 0x100;
 320 | #endif
 321 | 
 322 |   if (flzlit) fprintf(flzlit,"%c",1);
 323 |   putbit(1); bitslzlen++;
 324 |   offset=-offset; /* 1.. */
 325 |   offset--; /* 0.. */
 326 |   if (was_letter) { bitsolzlen++;
 327 |     was_letter=0;
 328 |     if (old_ofs==offset) {
 329 |       stolz++;
 330 |       if (folz) fprintf(folz,"%c",0);
 331 | 
 332 | #ifdef EXPERIMENTS
 333 | // test combining everything into one model for simple entropy coding
 334 |       code512 |= 0x80;
 335 |       if (length-MINOLEN < 0x7F) {
 336 |         code512 |= length-MINOLEN;
 337 |       } else {
 338 |         code512 |= 0x3F;
 339 |         length-=MINOLEN+0x7F;
 340 |         fwrite(&length, 4, 1, test2);
 341 |         length+=MINOLEN+0x7F;
 342 |       }
 343 |       code512 = (code512 & 0xFF) << 8 | (code512>>8);
 344 |       fwrite(&code512, 2, 1, test);
 345 | #endif
 346 | 
 347 |       putbit(0);
 348 |       putenc_l(length-MINOLEN);
 349 |       return;
 350 |     }
 351 |     if (folz) fprintf(folz,"%c",1);
 352 |     putbit(1);
 353 |   }
 354 |   length-=MINLZ;
 355 |   stlz++;
 356 |   if (offset+1>=longlen) { length--; }
 357 |   if (offset+1>=hugelen) { length--; }
 358 | 
 359 | #ifdef EXPERIMENTS
 360 |   if (length < 15) {
 361 |     code512 |= length;
 362 |   } else {
 363 |     code512 |= 15;
 364 |     length-=15;
 365 |     fwrite(&length, 4, 1, test2);
 366 |     length+=15;
 367 |   }
 368 |   code512 |= (offset & 0x7) << 4;
 369 |   code512 = (code512 & 0xFF) << 8 | (code512>>8);
 370 |   fwrite(&code512, 2, 1, test);
 371 |   uint tmpofs = offset >> 4;
 372 |   fwrite(&tmpofs, 4, 1, test3);
 373 | #endif
 374 |   
 375 |   putenc(offset,used,breaklz, 0);
 376 |   putenc_l(length-MINLZ+2);
 377 | 
 378 |   old_ofs=offset;
 379 | }
 380 | 
 381 | static inline void put_letter(uint8_t b) {
 382 | #ifdef EXPERIMENTS
 383 |   uint16_t code512 = b;
 384 |   code512 = (code512 & 0xFF) << 8 | (code512>>8);
 385 |   fwrite(&code512, 2, 1, test);
 386 | #endif
 387 | 
 388 |   if (flzlit) fprintf(flzlit,"%c",0);
 389 |   if (flit) fprintf(flit,"%c",b);
 390 |   putbit(0); bitslzlen++;
 391 |   out_buf[outpos++]=b; bitslit+=8;
 392 |   was_letter++;
 393 |   stlet++;
 394 | }
 395 | 
 396 | static inline int price_lz(int offset, int length, int used) { // offset>=1, length>=2, 
 397 |                                                     // if offset=>0xD00  length>=3
 398 |   int res=1; /* 1 bit = not a letter */
 399 |   if (offset>=longlen) { length--; }
 400 |   if (offset>=hugelen) { length--; }
 401 | 
 402 |   offset--; // 0.. 
 403 | 
 404 |   res+=price_offset(offset,used);
 405 |   res+=price_len(length-MINLZ+2);
 406 |   return res;
 407 | }
 408 | 
 409 | static inline int price_lzlen(int offset, int length, int used) { // offset>=1, length>=2, 
 410 |                                                     // if offset=>0xD00  length>=3
 411 |   int res=1; /* 1 bit = not a letter */
 412 |   if (offset>=longlen) { length--; }
 413 |   if (offset>=hugelen) { length--; }
 414 | 
 415 |   res+=price_len(length-MINLZ+2);
 416 |   return res;
 417 | }
 418 | 
 419 | static inline int price_replz_minus_lz(int offset, int length, int used) { // offset>=1, length>=2, 
 420 |                                                     // if offset=>0xD00  length>=3
 421 |   int res=2 /* lzlit flag, replz flag */ +price_replen(length+2-MINOLEN);
 422 |   return res-price_lz(offset,length,used);
 423 | }
 424 | 
 425 | static inline int cmpstr(int src,int src2) {
 426 |   int res=0;
 427 |   int b;
 428 | 
 429 |   for(;;) {
 430 |     if (in_buf[src]!=in_buf[src2]) return res;
 431 |     b=rle[src2];
 432 |     if (!b) return res;
 433 |     if (b>rle[src]) {return res+rle[src];}
 434 |     res+=b;
 435 |     src+=b;
 436 |     src2+=b;
 437 |   }
 438 |   return res;
 439 | }
 440 | 
 441 | int cmpstrsort(int *psrc,int *psrc2) {
 442 |   int b;
 443 |   int src = *psrc;
 444 |   int src2 = *psrc2;
 445 | //  printf("%d:%d:%d\n",src,src2,left);
 446 |   do {
 447 |     if (in_buf[src]<in_buf[src2]) return -1;
 448 |     if (in_buf[src]>in_buf[src2]) return 1;
 449 |     b=rle[src2];
 450 |     if (!b) return 1; // first string is longer
 451 |     if (b>rle[src]) b=rle[src];
 452 |     if (!b) return -1; // second string is longer
 453 |     src+=b;
 454 |     src2+=b;
 455 |   } while(1);
 456 | }
 457 | 
 458 | void init_same(int start, int n) {
 459 |   int i;
 460 |   uint16_t bb;
 461 |   int run_len;
 462 |   int gen_same[256*256+256];
 463 | 
 464 |   /*
 465 |     Notes: the slowest parts here are PLCP array construction and divsufsort.
 466 |     On slower levels -7 .. -9 it does not matter.
 467 |     But on fast levels -1..-3 (that still provide good compression),
 468 |     initialization takes about 20-30% processing time.
 469 |     
 470 |     Also, it reprocesses whole history each time a new block is read, 
 471 |     which is clearly not optimal.
 472 |     
 473 |     possible optimizations:
 474 |     1. store SA for later reuse, do divsufsort for new block only, then
 475 |     merge them. not sure if it will be faster. still need to recalculate rlcp
 476 |     2. get rid of SA completely, construct suffix tree directly.
 477 |   */
 478 |   for(i=0;i<256+256*256;i++) gen_same[i] =0;	// for bucketA & bucketB
 479 |   divsufsort(in_buf,sorted,gen_same,n);
 480 |   // reuse sorted_prev for temp buffer
 481 | #define rank(i) rle[i]
 482 |   /* 
 483 |    calculate plcp in O(n) time
 484 |    see http://www.cs.ucr.edu/~stelo/cpm/cpm09/04_karkk.pdf
 485 |    http://www.mi.fu-berlin.de/wiki/pub/ABI/Sequence_analysi_2013/2004_ManziniTwo_Space_Saving_Tricks_for_Linear_Time_LCP_Array_Computation.pdf
 486 |   */
 487 |   for(i=1;i<=n-1;i++) rank(sorted[i]) = sorted[i-1];
 488 |   rank(sorted[0]) = sorted[n-1];
 489 |   
 490 |   sorted_prev(sorted[0])=-1;
 491 |   for(i=1;i<n;i++) sorted_prev(sorted[i])=sorted[i-1];
 492 | 
 493 |   for(i=0;i<n-1;i++) sorted_next(sorted[i])=sorted[i+1];
 494 |   sorted_next(sorted[n-1])=-1;
 495 |   
 496 |   int h=0;
 497 |   for(i=0;i<=n-1;i++) {
 498 |     int j = rank(i);
 499 |     while(i+h<=n && j+h<=n && in_buf[i+h]==in_buf[j+h]) h++;
 500 |     sorted_len(i) = h;
 501 |     if (h<=MINLZ) {
 502 |       sorted_prev(i)=-1;
 503 |       sorted_next(j)=-1;
 504 |     }
 505 |     if(h>0) h--;
 506 |   }
 507 | 
 508 |   rle[n] = run_len = 0;
 509 |   uint8_t b = in_buf[n-1];
 510 |   for(i=n-1;i>=0;i--) {
 511 |     if (in_buf[i]==b) 
 512 |       run_len++;
 513 |     else {
 514 |       b=in_buf[i];
 515 |       run_len = 1;
 516 |     }
 517 |     rle[i]=run_len;
 518 |   }
 519 | 
 520 |   bb=0;
 521 | 
 522 |   for(i=0;i<65536;i++) {gen_same[i]=-1; }
 523 |   for(i=0;i<n-1;i++) { 
 524 |     bb=in_buf[i]; bb<<=8; bb|=in_buf[i+1];
 525 |     same(i)=gen_same[bb]; 
 526 |     if (gen_same[bb]>=0) { samelen(i)=1+cmpstr(i+1,same(i)+1);}
 527 |     gen_same[bb]=i; 
 528 |   }
 529 |   same(i)=-1;
 530 | 
 531 |   in_buf[n]=0;
 532 | 
 533 |   if (verbose) printf("init done.\n");
 534 | }
 535 | 
 536 | #define CHECK_REPLZ \
 537 |         int k;\
 538 | 	int jjj;\
 539 |         int d=level;\
 540 |         int tmp=pofs+price_lzlen(used-pos,len,used);\
 541 |         int olen=0;\
 542 |         for(k=len+1;k<left-2;k++) {\
 543 |           tmp+=9;\
 544 |           if (best_ofs(used+k)==pos-used) {\
 545 |             int tmp2=tmp+price_replz_minus_lz(used-pos,best_len(used+k),used+k);\
 546 |             tmp2+=cache(used+k);\
 547 |             if (tmp2<res || (tmp2==res && my_use_olz && my_best_ofs<pos-used)) {\
 548 |               res=tmp2;\
 549 |               my_best_ofs=pos-used;\
 550 |               my_best_len=len;\
 551 |               my_use_olz=k-len;\
 552 |               my_olz_len=best_len(used+k);\
 553 |               my_use_olz2=use_olz(used+k);\
 554 | 	      my_olz_len2=olz_len2(used+k);\
 555 |             }\
 556 |           }\
 557 |           if (olen==0) {\
 558 |             olen=cmpstr(used+k,pos+k);\
 559 |             for (j=MINOLEN;j<olen;j++) {\
 560 |               int tmp2=tmp+1+1+price_replen(j+2-MINOLEN);\
 561 |               tmp2+=cache(used+k+j);\
 562 |               if (tmp2<res || (tmp2==res && my_best_ofs<pos-used)) {\
 563 |                 res=tmp2;\
 564 |                 my_best_ofs=pos-used;\
 565 |                 my_best_len=len;\
 566 |                 my_use_olz=k-len;\
 567 |                 my_olz_len=j;\
 568 |                 my_use_olz2=0;\
 569 |               }\
 570 |             }\
 571 | 	    if (olen>=MINOLEN) {\
 572 |               int tmp2=tmp+2+price_replen(olen+2-MINOLEN);\
 573 |               tmp2+=cache(used+k+olen);\
 574 |               if (best_len(used+k+olen)==1) {\
 575 | 	        int jj;\
 576 | 	        for(jj=1;jj<=8;jj++) {\
 577 |                   if (best_len(used+k+olen+jj)>1) {\
 578 |                     if (best_ofs(used+k+olen+jj)==pos-used) {\
 579 |                       tmp2+=price_replz_minus_lz(used-pos,best_len(used+k+olen+jj),used+k+olen+jj);\
 580 | 		      break;\
 581 |                     }\
 582 | 	          }\
 583 |                       int olen2=cmpstr(used+k+olen+jj,pos+k+olen+jj);\
 584 |             for (jjj=MINOLEN;jjj<=olen2;jjj++) {\
 585 | 		     /* if (olen2>=MINOLEN) {*/\
 586 | 		        int tmp3=-cache(used+k+olen);\
 587 | 			tmp3+=jj*9+2+price_replen(jjj+2-MINOLEN);\
 588 | 			tmp3+=cache(used+k+olen+jj+jjj);\
 589 | 			if (tmp3<0) { tmp3+=tmp2;\
 590 |               if (tmp3<res || (tmp3==res && my_best_ofs<pos-used)) {\
 591 |                 res=tmp3;\
 592 |                 my_best_ofs=pos-used;\
 593 |                 my_best_len=len;\
 594 |                 my_use_olz=k-len;\
 595 |                 my_olz_len=olen;\
 596 |                 my_use_olz2=jj;\
 597 |                 my_olz_len2=jjj;\
 598 |               }\
 599 | 			}\
 600 | 		      }\
 601 | 	            break;\
 602 | 	        }\
 603 |               }\
 604 |               if (tmp2<res || (tmp2==res && my_best_ofs<pos-used)) {\
 605 |                 res=tmp2;\
 606 |                 my_best_ofs=pos-used;\
 607 |                 my_best_len=len;\
 608 |                 my_use_olz=k-len;\
 609 |                 my_olz_len=olen;\
 610 |                 my_use_olz2=0;\
 611 |               }\
 612 | 	    }\
 613 |           } else olen--;\
 614 |           if (best_ofs(used+k)) {\
 615 |             d--; if (d==0) break;\
 616 |           }\
 617 |         }
 618 | 
 619 | 
 620 | int pack(int start, int n) {
 621 |   int res;
 622 |   int i;
 623 | 
 624 |   if (n<start) { return 0; }
 625 | 
 626 |   init_same(start,n);
 627 |   cache(n-1)=9; /* last letter cannot be packed as a lz */
 628 |   best_ofs(n-1)=0;
 629 |   best_len(n-1)=1;
 630 |   use_olz(n-1)=0;
 631 |   use_olz(n-1)=0;
 632 | 
 633 |   if (sorted_prev(n-1)>=0) {
 634 |     sorted_next(sorted_prev(n-1))=sorted_next(n-1);
 635 |   }
 636 |   if (sorted_next(n-1)>=0) {
 637 |     sorted_len(sorted_next(n-1)) = Min(sorted_len(sorted_next(n-1)),
 638 |                                             sorted_len(n-1));
 639 |     sorted_prev(sorted_next(n-1))=sorted_prev(n-1);
 640 |   }
 641 | 
 642 |   for(i=n-2;i>=start;i--) {
 643 |     int used=i;
 644 |     int left=n-i;
 645 |     int res;
 646 |     int pos;
 647 |     int max_match;
 648 |     int len;
 649 |     int j;
 650 | 
 651 |     int my_best_ofs=0;
 652 |     int my_best_len=1;
 653 |     int my_use_olz=0;
 654 |     int my_use_olz2=0;
 655 |     int my_olz_len=0;
 656 |     int my_olz_len2=0;
 657 |     int match_check_max;
 658 |     int notskip = 1; 
 659 | 
 660 |     res=9+cache(used+1);
 661 |     if (best_ofs(used+1)) {
 662 |       res++;
 663 |       if (in_buf[used]==in_buf[used+1]) {
 664 |         if (in_buf[used]==in_buf[used-1]) {
 665 |           if (in_buf[used]==in_buf[used+best_ofs(used+1)]) {
 666 |             if ((best_len(used+1)>3)||(best_len(used+1)==3&&-best_ofs(used+1)<hugelen)|| (-best_ofs(used+1)<longlen)) {
 667 |               int tmp=cache(used+1)-price_lz(-best_ofs(used+1),best_len(used+1),used+1)
 668 |                   +price_lz(-best_ofs(used+1),best_len(used+1)+1,used);
 669 |               if (tmp<=res) {
 670 |                 res=tmp;
 671 |                 my_best_ofs=best_ofs(used+1);
 672 |                 my_best_len=best_len(used+1)+1;
 673 |                 my_use_olz=use_olz(used+1);
 674 |                 my_olz_len=olz_len(used+1);
 675 |                 my_use_olz2=use_olz2(used+1);
 676 |                 my_olz_len2=olz_len2(used+1);
 677 |               }
 678 |               if (my_best_len>=5)
 679 |                 notskip = 0;
 680 |             }
 681 |           }
 682 |         }
 683 |       }
 684 |     }
 685 | 
 686 |     int k;
 687 |     for(k=1;k<4;k++)
 688 |       if (n-i>2+k && best_ofs(used+2+k) 
 689 |         && used+best_ofs(used+2+k) >= 0
 690 |         && -best_ofs(used+2+k) < longlen && best_ofs(used+2+k)!=best_ofs(used+1+k)) {
 691 |         if (in_buf[used]==in_buf[used+best_ofs(used+2+k)]) {
 692 |           if (in_buf[used+1]==in_buf[used+1+best_ofs(used+2+k)]) {
 693 |               int tmp=cache(used+2+k)+price_replz_minus_lz(-best_ofs(used+2+k),best_len(used+2+k),used+2+k)
 694 |                   +9*k+price_lz(-best_ofs(used+2+k),2,used);
 695 | 	      if (tmp<=res) {
 696 | 	        res=tmp;
 697 |                 my_best_ofs=best_ofs(used+2+k);
 698 |                 my_best_len=2;
 699 |                 my_use_olz=k;
 700 |                 my_olz_len=best_len(used+2+k);
 701 |                 my_use_olz2=use_olz(used+2+k);
 702 |                 my_olz_len2=olz_len(used+2+k);
 703 | 	      }
 704 |           }
 705 |         }
 706 |       }
 707 | 
 708 |     for(k=1;k<4;k++)
 709 |       if (n-i>3+k && best_ofs(used+3+k) 
 710 |         && used+best_ofs(used+3+k) >= 0
 711 |         && -best_ofs(used+3+k) < hugelen && best_ofs(used+3+k)!=best_ofs(used+2+k)) {
 712 |         if (in_buf[used]==in_buf[used+best_ofs(used+3+k)]) {
 713 |           if (in_buf[used+1]==in_buf[used+1+best_ofs(used+3+k)]) {
 714 |             if (in_buf[used+2]==in_buf[used+2+best_ofs(used+3+k)]) {
 715 |               int tmp=cache(used+3+k)+price_replz_minus_lz(-best_ofs(used+3+k),best_len(used+3+k),used+3+k)
 716 |                   +9*k+price_lz(-best_ofs(used+3+k),3,used);
 717 | 	      if (tmp<=res) {
 718 | 	        res=tmp;
 719 |                 my_best_ofs=best_ofs(used+3+k);
 720 |                 my_best_len=3;
 721 |                 my_use_olz=k;
 722 |                 my_olz_len=best_len(used+3+k);
 723 |                 my_use_olz2=use_olz(used+3+k);
 724 |                 my_olz_len2=olz_len(used+3+k);
 725 | 	      }
 726 |             }
 727 |           }
 728 |         }
 729 |       }
 730 | 
 731 |     for(k=1;k<4;k++)
 732 |       if (n-i>4+k && best_ofs(used+4+k) 
 733 |         && used+best_ofs(used+4+k) >= 0
 734 |         && best_ofs(used+4+k)!=best_ofs(used+3+k)) {
 735 |         if (in_buf[used]==in_buf[used+best_ofs(used+4+k)]) {
 736 |           if (in_buf[used+1]==in_buf[used+1+best_ofs(used+4+k)]) {
 737 |             if (in_buf[used+2]==in_buf[used+2+best_ofs(used+4+k)]) {
 738 |               if (in_buf[used+3]==in_buf[used+3+best_ofs(used+4+k)]) {
 739 |                 int tmp=cache(used+4+k)+price_replz_minus_lz(-best_ofs(used+4+k),best_len(used+4+k),used+4+k)
 740 |                   +9*k+price_lz(-best_ofs(used+4+k),4,used);
 741 | 	        if (tmp<=res) {
 742 | 	          res=tmp;
 743 |                   my_best_ofs=best_ofs(used+4+k);
 744 |                   my_best_len=4;
 745 |                   my_use_olz=k;
 746 |                   my_olz_len=best_len(used+4+k);
 747 |                   my_use_olz2=use_olz(used+4+k);
 748 |                   my_olz_len2=olz_len(used+4+k);
 749 | 	        }
 750 |               }
 751 |             }
 752 |           }
 753 |         }
 754 |       }
 755 |     pos=same(used);
 756 |     if (pos<0) goto done;
 757 |     if (!notskip) goto done;
 758 | 
 759 |     {
 760 |       len=samelen(used);
 761 |       int ll=(used-pos>=longlen)?1:0;
 762 |       if (used-pos>=hugelen) ll=2;
 763 |       int pofs = price_offset(used-pos-1,used);
 764 |       if (len<left && len>=2+ll) {
 765 |         CHECK_REPLZ
 766 |       }
 767 |       for(j=MINLZ+ll;j<=len;j++) {
 768 |         int tmp=pofs+price_lzlen(used-pos,2-MINLZ+j,used);
 769 |         tmp+=cache(used+j);
 770 |         if (tmp<res) {
 771 |           res=tmp;
 772 |           my_best_ofs=pos-used;
 773 |           my_best_len=j;
 774 |           my_use_olz=0;
 775 |           my_olz_len=0;
 776 |         }
 777 |       }
 778 |       max_match=len;
 779 | 
 780 |     }
 781 |     if (max_match<MINLZ) max_match=MINLZ;
 782 |     match_check_max = short_match_level;
 783 |       for(;;) {
 784 |         int slen=samelen(pos);
 785 |         pos=same(pos);
 786 |         int ll=(used-pos>=longlen)?1:0;
 787 |         if (used-pos>=hugelen) ll=2;
 788 |         //if (used-pos>=longlen) break;
 789 |         if (pos<0) break;
 790 |         if (len>slen) {
 791 |           len=slen;
 792 |         } else if (len==slen) {
 793 |           len+=cmpstr(used+len,pos+len);
 794 |         } 
 795 |         int pofs = price_offset(used-pos-1,used);
 796 |         if (len<left && len>=2+ll) {
 797 |           CHECK_REPLZ
 798 |         }
 799 |         if (len>max_match) {
 800 |           for(j=Max(max_match+1,MINLZ+ll);j<=len;j++) {
 801 |             int tmp=pofs+price_lzlen(used-pos,j-MINLZ+2,used);
 802 |             tmp+=cache(used+j);
 803 |             if (tmp<res) {
 804 |               res=tmp;
 805 |               my_best_ofs=pos-used;
 806 |               my_best_len=j;
 807 |               my_use_olz=0;
 808 |               my_olz_len=0;
 809 |             }
 810 |           }
 811 | 	  max_match=len;
 812 |         } else { match_check_max--; if (match_check_max <= 0) break; }
 813 |         
 814 |       }
 815 |     
 816 |     int top=sorted_prev(used);
 817 |     int bottom=sorted_next(used);
 818 |     int len_top=top >= 0 ? sorted_len(used) : 0;
 819 |     int len_bottom=bottom >= 0 ? sorted_len(bottom) : 0;
 820 | 
 821 |     match_check_max = match_level;
 822 |     int my_min_ofs=used+1;
 823 |     while (top>=0 || bottom >=0) {
 824 |       match_check_max--;
 825 |       if (match_check_max<=0) goto done;
 826 |       if (len_top>len_bottom) {
 827 |         pos=top;
 828 | 	len=len_top;
 829 |         len_top = Min(len_top,top >= 0 ? sorted_len(top):0);
 830 | 	top=sorted_prev(pos);
 831 |       } else {
 832 |         pos=bottom;
 833 | 	len=len_bottom;
 834 | 	bottom=sorted_next(pos);
 835 |         len_bottom = Min(len_bottom,bottom >= 0 ? sorted_len(bottom):0);
 836 |       }
 837 |       if (len<=MINLZ) goto done;
 838 |       if (len<=MINLZ+1 && used-pos>=hugelen) continue; // 
 839 |       int pofs = price_offset(used-pos-1,used);
 840 |       if (len<left) {
 841 |           CHECK_REPLZ
 842 |       }
 843 |       if (my_min_ofs>used-pos) {
 844 |         my_min_ofs=used-pos;//we are checking matches in decreasing order. we need to check next matches only if those are shorter
 845 |         int ll=(used-pos>=hugelen)?1:0;
 846 |         for(j=MINLZ+1+ll;j<=len;j++) {
 847 |           int tmp=pofs+price_lzlen(used-pos,j-MINLZ+2,used);
 848 |           tmp+=cache(used+j);
 849 |           if (tmp<res || (tmp==res && my_best_ofs<pos-used)) {
 850 |             res=tmp;
 851 |             my_best_ofs=pos-used;
 852 |             my_best_len=j;
 853 |             my_use_olz=0;
 854 |             my_olz_len=0;
 855 |           }
 856 |         }
 857 |       }
 858 | 
 859 |     }    
 860 | 
 861 | done:
 862 |     if (sorted_prev(used)>=0) {
 863 |       sorted_next(sorted_prev(used))=sorted_next(used);
 864 |     }
 865 |     if (sorted_next(used)>=0) {
 866 |       sorted_len(sorted_next(used)) = Min(sorted_len(sorted_next(used)),
 867 |                                             sorted_len(used));
 868 |       sorted_prev(sorted_next(used))=sorted_prev(used);
 869 |     }
 870 | 
 871 |     best_ofs(used)=my_best_ofs;
 872 |     best_len(used)=my_best_len;
 873 |     use_olz(used)=my_use_olz;
 874 |     olz_len(used)=my_olz_len;
 875 |     use_olz2(used)=my_use_olz2;
 876 |     olz_len2(used)=my_olz_len2;
 877 |     cache(used)=res;
 878 | 
 879 |     if (verbose && (i&0xFFF)==0) {
 880 |       printf("\x0D%d left ",i-start);
 881 |       fflush(stdout);
 882 |     }
 883 |   }
 884 | 
 885 |   res=8+cache(start);
 886 |   if (verbose) printf("\nres=%d\n",res);
 887 |   res+=7;
 888 |   res>>=3;
 889 |   if (verbose) printf("res bytes=%d\n",res);
 890 |   if (res>=n-start) {
 891 |     return n;
 892 |   };
 893 | 
 894 |   /* now we can easily generate compressed stream */
 895 |   initout(start);
 896 |   for(i=start;i<n;) {
 897 |     if (best_len(i)==1) {
 898 |       put_letter(in_buf[i]); i++;
 899 |     } else {
 900 |       int k,ofs,len,k2,len2;
 901 | dolz:
 902 | //      printf("do_lz %d:%d,left=%d\n",best_ofs[i],best_len[i],n-i);
 903 |       put_lz(best_ofs(i),best_len(i),i);
 904 |       ofs=best_ofs(i);
 905 |       len=olz_len(i);
 906 |       len2=olz_len2(i);
 907 |       k=use_olz(i);
 908 |       k2=use_olz2(i);
 909 |       i+=best_len(i);
 910 |       if (k>0) {
 911 |         for(;k>0;k--) put_letter(in_buf[i++]);
 912 |         if ((use_olz(i))&&(len==best_len(i))&&(ofs==best_ofs(i))) goto dolz;
 913 | //        printf("put_lz %d:%d,left=%d\n",ofs,len,n-i);
 914 |         put_lz(ofs,len,i);
 915 |         i+=len;
 916 |       if (k2>0) {
 917 |         for(;k2>0;k2--) put_letter(in_buf[i++]);
 918 |         if ((use_olz(i))&&(len2==best_len(i))&&(ofs==best_ofs(i))) goto dolz;
 919 | //        printf("put_lz %d:%d,left=%d\n",ofs,len,n-i);
 920 |         put_lz(ofs,len2,i);
 921 |         i+=len2;
 922 | 	
 923 |       }
 924 | 	
 925 |       }
 926 |     }
 927 |   }
 928 |   if (verbose) printf("out bytes=%d\n",outpos);
 929 |   return outpos;
 930 | }
 931 | 
 932 | int main(int argc,char *argv[]) {
 933 |   FILE *ifd,*ofd;
 934 |   int n,i,bres,blz;
 935 |   uint8_t b;
 936 | 
 937 |   if (argc<3) {
 938 |     // note: -d0 (32k history) does not work right now
 939 |     printf("usage: lzoma [OPTION] input output [lzlit lit olz len dist]\n"
 940 |            "\t-1 .. -9 Compression level (default 7)\n"
 941 |            "\t-d[1..15] History size (default 9: 16M history; compression currently requires about 30x*history RAM)\n"
 942 |            "\t-v Be verbose\n"
 943 |            );
 944 |     printf("Notice: this program is at experimental stage of development. Compression format is not stable yet.\n");
 945 |     if (argc>1 && argv[1][0]=='%') { // undocumented debug feature to check correctness of offset encoding, when tuning parameters in lzoma.h
 946 |       int i;
 947 |       int total=atoi(argv[1]+1);//16*1024*1024;
 948 |       printf("%d\n",total);
 949 |       for(i=total-10;i<total;i++) {
 950 |         printf("%04d:",i);
 951 |         putenc(i, total,breaklz, 1);
 952 |         printf("\n");
 953 |       }
 954 |     }
 955 |     exit(0);
 956 |   }
 957 |   int arg=1;
 958 |   int metalevel = 7;
 959 |   int dict_size=9;
 960 |   while (arg<argc && argv[arg][0]=='-') {
 961 |     if (argv[arg][1]>='1' && argv[arg][1]<='9')
 962 |       metalevel = argv[arg][1]-'0';
 963 |     if (argv[arg][1]=='v')
 964 |       verbose = 1;
 965 |     if (argv[arg][1]=='d') {
 966 |       dict_size = atoi(argv[arg]+2);
 967 |       if (dict_size <1) dict_size=1; 
 968 |       if (dict_size >15) dict_size=15; 
 969 |     }
 970 |     arg++;
 971 |   }
 972 |   history_size=HISTORY_SIZE(dict_size);
 973 |   block_size=BLOCK_SIZE(dict_size);
 974 |   metalevel--;
 975 |   level=levels[metalevel][0];
 976 |   short_match_level=levels[metalevel][1];
 977 |   match_level=levels[metalevel][2];
 978 |   in_buf = (void *)malloc(history_size * sizeof(uint8_t)+1);
 979 |   rle = (void *)malloc(history_size * sizeof(uint32_t));
 980 |   state = (void *)malloc(Max(block_size * sizeof(FutureState), history_size * sizeof(uint32_t)));
 981 |   past_state = (void *)malloc(history_size * sizeof(PastState));
 982 |   char *inf=argv[arg++];
 983 |   char *ouf=argv[arg++];
 984 |   ifd=fopen(inf,"rb");
 985 |   ofd=fopen(ouf,"wb");
 986 |   if (arg<argc) flzlit=fopen(argv[arg++],"wb");
 987 |   if (arg<argc) flit=fopen(argv[arg++],"wb");
 988 |   if (arg<argc) folz=fopen(argv[arg++],"wb");
 989 |   if (arg<argc) flen=fopen(argv[arg++],"wb");
 990 |   if (arg<argc) fdist=fopen(argv[arg++],"wb");
 991 | 
 992 | #ifdef EXPERIMENTS
 993 |   test=fopen("test","wb");
 994 |   test2=fopen("test2","wb");
 995 |   test3=fopen("test3","wb");
 996 | #endif
 997 |   
 998 |   int blocknum;
 999 |   uint32_t blk;
1000 |   for(blocknum=0;;blocknum++) {
1001 |     if (in_offset>history_size-block_size) {
1002 |       memmove(in_buf, in_buf+block_size, history_size-block_size);
1003 |       in_offset -= block_size;
1004 |     }
1005 |     n=fread(in_buf+in_offset,1,block_size,ifd);
1006 |     if (n<=0) {
1007 |       blk = BLOCK_STORED | BLOCK_LAST;
1008 |       fwrite(&blk,4,1,ofd);
1009 |       break;
1010 |     }
1011 |     if (verbose) printf("got %d bytes, packing...\n",n);
1012 |     if (blocknum==0) {
1013 |       /*
1014 |       int b1=cnt_bpes(in_buf,n);
1015 |       int use_e8=1;
1016 |       e8(in_buf, n);
1017 |       int b2=cnt_bpes(in_buf,n);
1018 |       printf("stats noe8 %d e8 %d\n",b1,b2);
1019 |       if (b2<=b1) {
1020 |         use_e8=0;
1021 |         printf("reverted e8\n");
1022 | 
1023 |         e8back(in_buf,n);
1024 |       }
1025 |       */
1026 |       /* 
1027 |          write compressed file header 
1028 |          we do it here only after we read some data
1029 |          TODO:
1030 |            at this stage we should decide if we will use any file-level compression filters
1031 |       */
1032 |       uint8_t header[8];
1033 |       header[0] = AuthorID >> 8;
1034 |       header[1] = AuthorID & 0xFF;
1035 |       header[2] = AlgoID[0];
1036 |       header[3] = AlgoID[1];
1037 |       header[4] = AlgoID[2];
1038 |       header[5] = AlgoID[3];
1039 |       header[6] = Version;
1040 |       int flags=0;
1041 |       header[7] = flags << 4 | dict_size; 
1042 |       fwrite(header,8,1,ofd);
1043 | 
1044 |       bres=pack(1,n);
1045 |     } else { // next blocks
1046 |       bres=pack(in_offset,in_offset+n);
1047 |     }
1048 |     uint32_t blk = (n < block_size) ? BLOCK_LAST : 0;
1049 |     if (bres==n) {
1050 |       blk |= BLOCK_STORED;
1051 |       blk |= n;
1052 |       fwrite(&blk,4,1,ofd);
1053 |       fwrite(in_buf+in_offset,1,n,ofd);
1054 |     } else {
1055 |       blk |= bres;
1056 |       fwrite(&blk,4,1,ofd);
1057 |       if (blk & BLOCK_LAST)
1058 |         fwrite(&n,4,1,ofd);
1059 |       fwrite(out_buf,1,bres,ofd);
1060 |       if (blk & BLOCK_LAST)
1061 |         break;
1062 |     }
1063 |     
1064 |     in_offset += n;
1065 |   }
1066 |   if (verbose) printf("closing files let=%d lz=%d olz=%d\n",stlet,stlz,stolz);
1067 |   if (verbose) printf("bits lzlit=%d let=%d olz=%d match=%d len=%d\n",bitslzlen,bitslit,bitsolzlen,bitsdist,bitslen);
1068 |   fclose(ifd);
1069 |   fclose(ofd);
1070 | 
1071 | #ifdef EXPERIMENTS
1072 |   fclose(test);
1073 |   fclose(test2);
1074 |   fclose(test3);
1075 | #endif
1076 | 
1077 |   return 0;
1078 | }
1079 | 


--------------------------------------------------------------------------------
/readme.MSVC:
--------------------------------------------------------------------------------
 1 | To compile with MSVC:
 2 | 
 3 | Open Developer Command Prompt and type:
 4 | cl pack.c divsufsort.c
 5 | cl unpack.c
 6 | 
 7 | Note: MSVC support is currently untested, probably has bugs. Better use gcc or mingw if possible.
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
  1 | Experimental packer based on new compression algoritm LZOMA).
  2 | (C)2015-2016 Alexandr Efimov <alef@webzavod.ru>
  3 | 
  4 | This code can be redistributed on GPL Version 2 License.
  5 | For commercial licenses or support please contact author.
  6 | 
  7 | Project goals:
  8 |   extremely fast in-place decompression (similar to LZO)
  9 |   but with high compression ratio (much better than LZO, GZIP, BZIP2)
 10 | 
 11 | Current results:
 12 | 
 13 | Compression ratio is much higher than gzip. And much much higher than LZO.
 14 | Decompression speed is similar to UCL (a bit slower that LZO, faster than
 15 | gzip, bzip2 etc).
 16 | Decompressor code length is less than 300 bytes.
 17 | Has special filter for x86 code.
 18 | Decompression can be done in-place and does not require additional memory.
 19 | 
 20 | Overall, the results a very good for "compress once, unpack often" tasks like
 21 | linux kernel & ramdisk, readonly compressed filesystems.
 22 | 
 23 | Comparison with other compression software:
 24 | Nearest competitors are zstd, brotli.
 25 | Other compressors/archives either decompress much slower or has much worse compression ratio.
 26 | 
 27 | Compression ratio on binary files (without effect of e8e8 filter),
 28 | from best to worst:
 29 | brotli, lzoma, zstd
 30 | 
 31 | Compression ratio on text files:
 32 | brotli, zstd, lzoma
 33 | 
 34 | Decompressor code size:
 35 | lzoma, zstd, brotli
 36 | 
 37 | Decompression speed, on x86-64:
 38 | zstd is about 2x faster, lzoma and brotli has similar speed.
 39 | 
 40 | Decompression speed, on Intel Atom tablet:
 41 | zstd and lzoma has similar speed, brotli is 4x slower.
 42 | 
 43 | Algorithm description.
 44 | 
 45 | Compressed format has some features similar to both LZO and LZMA.
 46 | Does not use range coding.
 47 | Special bit added to matches that follow literals, indicating to re-use
 48 | previous offset instead of always storing the offset for each match. 
 49 | This allows to more efficiently compress patterns like abcdEabcdFabcdGabc, as
 50 | offset will be stored only for first match.
 51 | 
 52 | This idea allows much higher compression than classical LZ algorithms but
 53 | compressor is much more complicated.
 54 | 
 55 | Compressed data format:
 56 | literal, item, ... item
 57 | 
 58 | Where:
 59 | literal is uncompressed byte aligned at byte boundary
 60 | item is:
 61 | 1 bit flag (literal | match)
 62 | if flag is literal then literal follows
 63 | 
 64 | if flag is match then
 65 |   if previous item was literal
 66 |     1 bit flag==1: use previous offset for match
 67 |   if not use previous offset for match
 68 |     offset (encoded)
 69 |   len (encoded)
 70 | 
 71 | Notes:
 72 | 
 73 | Algorithm is still experimental, compressed format is not final yet.
 74 | 
 75 | File format (WIP, not implemented yet):
 76 | 1. Header
 77 | uint8_t[2] AuthorID 0xA1, 0xEF // this goes before AlgoID to avoid possible signature conflict with other LZ compressors
 78 | uint8_t[4] AlgoID 'L','Z','O','M'
 79 | uint8_t Version 0x00
 80 | uint8_t HistorySize (low 4 bits) || Flags
 81 |         where HistorySize is
 82 |         0: 32k
 83 |         1: 64k
 84 |         2: 128k
 85 |         3: 256k
 86 |         4: 512k
 87 |         5: 1M
 88 |         6: 2M
 89 |         7: 4M
 90 |         8: 8M
 91 |         9: 16M
 92 |         10:32M
 93 |         11:64M
 94 |         12:128M
 95 |         13:256M
 96 |         14:512M
 97 |         15: 1G
 98 |         BlockSize = HistorySize / 16
 99 | 
100 |         Flags:
101 |         0x10 - use filters, 1 byte filter type follows
102 |                0x00 - x86
103 |                0x01 - x86-64
104 |                0x02 - arm
105 |                0x03 - mips
106 |                0x04 - 0xF - reserved
107 |                0x10 - use delta filter
108 |                0x20 - text/xml filter
109 |                0x40 - reserved
110 |                0x80 - reserved
111 |         0x20 - encrypted file
112 |                TODO: some compression header follows
113 |         0x40 - digitally signed file (signature follows at the end of file)
114 |         0x80 - reserved
115 | 
116 | 2. Blocks
117 | Blocks header is 4 bytes or more:
118 | high bits masks:
119 | 0x80000000 - if set, it is a stored block
120 | 0x40000000 - last block, 4 byte unpacked length follows unless it is a stored block
121 |              if not set, unpacked length assumed to be BLOCK_SIZE
122 | 0x20000000 - reserved
123 | 0x10000000 - reserved
124 | low 28 bits = packed length up to 2^28, can be zero
125 | 
126 | 3. uint32_t CRC
127 | 
128 | 


--------------------------------------------------------------------------------
/unpack.c:
--------------------------------------------------------------------------------
  1 | // test file decompression using LZOMA algoritm
  2 | // (c) Alexandr Efimov, 2015-2016
  3 | // License: GPL v2 or later
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | #include <fcntl.h>
  8 | #ifndef _MSC_VER
  9 | #include <unistd.h>
 10 | #endif
 11 | #include <stdint.h>
 12 | #include <string.h>
 13 | //#include <x86intrin.h>
 14 | 
 15 | #ifndef O_BINARY
 16 |   #ifdef _O_BINARY
 17 |     #define O_BINARY _O_BINARY
 18 |   #else
 19 |     #define O_BINARY 0
 20 |   #endif
 21 | #endif
 22 | 
 23 | #include "lzoma.h"
 24 | 
 25 | uint8_t *in_buf; /* text to be decoded */
 26 | uint8_t *out_buf;/* decoded text + history */
 27 | 
 28 | //#define getbit (((bits=bits&0x7f? bits+bits :  (((unsigned)(*src++))<<1)+1)>>8)&1)
 29 | #define getbit ((bits=bits&0x7fffffff? (resbits=bits,bits+bits) :  (src+=4,resbits=*((uint32_t *)(src-4)),(resbits<<1)+1)),resbits>>31)
 30 | 
 31 | #define getcode(bits, src, ptotal) {\
 32 |   int total = (ptotal);\
 33 |   ofs=0;\
 34 |   long int res=0;\
 35 |   int x=256;\
 36 |   int top=0;\
 37 |   top=lzlow(total);\
 38 |   res=*src++;\
 39 | \
 40 |   while (1) {\
 41 |     x+=x;\
 42 |     if (x>=total+top) break;\
 43 |     if (x & lzmagic)\
 44 |       top=lzshift(top);\
 45 |     if (res<top) {  goto getcode_doneit;}\
 46 |     ofs-=top;\
 47 |     total+=top;\
 48 |     top+=top;\
 49 |     res+=res+getbit;\
 50 |   }\
 51 |   x-=total;\
 52 |   if (res>=x) { \
 53 |     res+=res+getbit;\
 54 |     res-=x;\
 55 |   }\
 56 | getcode_doneit: \
 57 |   ofs+=res;\
 58 | }
 59 | 
 60 | #define getlen(bits, src) {\
 61 |   long int res=0;\
 62 |   \
 63 |   if (getbit==0) {\
 64 |     len+=getbit;\
 65 |     goto getlen_0bit;\
 66 |   }\
 67 |   len+=2;\
 68 |   while (1) {  \
 69 |     res+=res+getbit;\
 70 |     if (getbit==0) break;\
 71 |     res++;\
 72 |   }\
 73 |   len+=res;\
 74 | getlen_0bit: ;\
 75 | }
 76 | 
 77 | static void unpack_c(int current_history_size, int history_size, uint8_t *src, uint8_t *dst, uint8_t *start, int left) {
 78 |   int ofs=-1;
 79 |   int len;
 80 |   uint32_t bits=0x80000000;
 81 |   uint32_t resbits;
 82 |   left--;
 83 |   history_size--;// becomes mask for circular buffer indexing
 84 |   if (current_history_size) {
 85 |     current_history_size-=dst-start;
 86 |     goto nextblock;
 87 |   }
 88 | 
 89 | copyletter:
 90 |   *dst++=*src++;
 91 |   left--;
 92 | nextblock:
 93 |   len=-1;
 94 | 
 95 | get_bit:
 96 |   if (left<0) return;
 97 |   if (getbit==0) goto copyletter;
 98 | 
 99 |   /* unpack lz */
100 |   if (len<0) {
101 |     len=1;
102 |     if (!getbit) {
103 |       goto uselastofs;
104 |     }
105 |   }
106 |   len=2;
107 |   getcode(bits,src,dst-start+current_history_size);
108 |   ofs++;
109 |   if (ofs>=longlen) len++;
110 |   if (ofs>=hugelen) len++;
111 |   ofs=-ofs;
112 | uselastofs:
113 |   getlen(bits,src);
114 |   left-=len;
115 | 
116 |   int ptr = dst-start+ofs;
117 |   do {
118 |     *dst=start[ptr&(history_size)];
119 |     ptr++;
120 |     dst++;
121 |   } while(--len);
122 |   goto get_bit;
123 | }
124 | 
125 | #ifdef ASM_X86
126 | extern unsigned int unpack_x86(uint8_t *src, uint8_t *dst, int left);
127 | #endif
128 | 
129 | #include "e8.h"
130 | int main(int argc,char * argv[]) {
131 |   int ifd,ofd;
132 |   int n,n_unp;
133 |   char shift;
134 | 
135 |   if (argc<3) {
136 |     printf("usage: unpack input output\n  Unpacks file packed using lzoma algoritm\n");
137 |     printf("Notice: this program is at experimental stage of development. Compression format is not stable yet.\n");
138 |     exit(0);
139 |   }
140 | 
141 |   ifd=open(argv[1],O_RDONLY|O_BINARY);
142 |   ofd=open(argv[2],O_WRONLY|O_TRUNC|O_CREAT|O_BINARY,511);
143 |   int current_history = 0;
144 |   int ofs = 0;
145 |   int use_e8=0;
146 |   uint8_t header[8];
147 |   read(ifd,header,8);
148 |   if (header[0] != (AuthorID >> 8) ||
149 |       header[1] != (AuthorID & 0xFF) ||
150 |       header[2] != AlgoID[0] ||
151 |       header[3] != AlgoID[1] ||
152 |       header[4] != AlgoID[2] ||
153 |       header[5] != AlgoID[3] ||
154 |       header[6] != Version) {
155 |     fprintf(stderr, "Unsupported compressed data format\n");
156 |     return 1;
157 |   }
158 |   int dict_size = header[7] & 0xF;
159 |   int history_size = HISTORY_SIZE(dict_size);
160 |   int block_size = BLOCK_SIZE(dict_size);
161 |   in_buf = (uint8_t *)malloc(block_size);
162 |   out_buf = (uint8_t *)malloc(history_size); // history is 16*block_size
163 | 
164 |   uint32_t blk;
165 |   while(read(ifd,&blk,4)==4) {
166 |     //if (use_e8) e8(out_buf,n_unp);
167 |     n = blk & (block_size-1);
168 |     if (blk & BLOCK_STORED) {
169 |       n_unp = n;
170 |     } else if (blk & BLOCK_LAST) {
171 |       read(ifd,&n_unp,4);
172 |     } else {
173 |       n_unp = block_size;
174 |     }
175 |     /*
176 |     if (n != n_unp && !current_history) 
177 |       read(ifd,&use_e8,1);
178 |     else
179 |       use_e8 = 0;
180 |     */
181 |     //long unsigned tsc = (long unsigned)__rdtsc();
182 |     if (n == n_unp) {
183 |       read(ifd,out_buf,n_unp);
184 |       write(ofd,out_buf+ofs,n_unp);
185 |     } else {
186 |       read(ifd,in_buf,n);
187 | #ifdef ASM_X86
188 | #error Asm version not yet updated for recent format changes. Please use C version right now.
189 |       unpack_x86(in_buf, out_buf, n_unp);
190 | #else
191 |       unpack_c(current_history, history_size, in_buf, out_buf+ofs, out_buf, n_unp);
192 | #endif
193 |       //tsc=(long unsigned)__rdtsc()-tsc;
194 |       //printf("tsc=%lu\n",tsc);
195 |       //if (use_e8) e8back(out_buf,n_unp);
196 |       write(ofd,out_buf+ofs,n_unp);
197 |     }
198 |     if (blk & BLOCK_LAST)
199 |       break;
200 |     ofs+=n_unp;
201 |     ofs &= (history_size-1);
202 |     current_history += n_unp;
203 |     if (current_history > history_size-block_size)
204 |       current_history = history_size-block_size;
205 |   }
206 | 
207 |   close(ifd);
208 |   close(ofd);
209 |   return 0;
210 | }
211 | 


--------------------------------------------------------------------------------
/unpack_lzoma.S:
--------------------------------------------------------------------------------
  1 | 	.file	"unpack_lzoma.S"
  2 | 	.section	.text.unlikely,"ax",@progbits
  3 | 	.text
  4 | 	.align 16
  5 | 	.globl	unpack_x86
  6 | 	.type	unpack_x86, @function
  7 | unpack_x86:
  8 | 	pushl	%ebp
  9 | 	pushl	%edi
 10 | 	pushl	%esi
 11 | 	pushl	%ebx
 12 | 	movl	28(%esp), %edx # uncompressed bytes num
 13 | 	movl	20(%esp), %esi # input buffer
 14 | 	decl	%edx
 15 | 	movl	24(%esp), %edi # edi = output buffer
 16 |         pushl   %edi # save output buffer start in stack
 17 | 	xorl	%ebp, %ebp
 18 | 	incl    %ebp # ebp = offset = -1
 19 | 	movl    $0x80000000,%eax
 20 | .copyletter:
 21 | 	movsb
 22 | 	#movb	(%esi), %cl   # cl = *src
 23 | 	#inc     %esi          # src++
 24 | 	#movb	%cl, (%edi)   # *dst = cl
 25 | 	#inc     %edi          # dst++
 26 | 	orl     $-1, %ebx     # ebx = len = -1
 27 | 	decl	%edx    # left--
 28 | 	js	.unpack_ret
 29 | .checkleft: 
 30 | 	addl    %eax,%eax
 31 | 	jnz	.nonextbit
 32 | 	lodsl
 33 | 	adcl    %eax,%eax
 34 | .nonextbit:
 35 | .checkifletterorlz:
 36 | 	jnc	.copyletter
 37 | .unpack_lz:
 38 | 	incl	%ebx
 39 | 	push %edx # save left
 40 | 	jnz	.load_ofs
 41 | 	addl    %eax,%eax
 42 | 	jnz	.L9
 43 | 	lodsl
 44 | 	adcl    %eax,%eax
 45 | .L9:
 46 | 	# ebx==0 at this point
 47 | 	jnc	.load_len
 48 | .load_ofs:
 49 | 	push %edi # save dst, we need edi as temp register
 50 | 	xor     %ebp,%ebp # ofs=0
 51 | 	movzxb	(%esi), %ecx # res=*src
 52 | 	mov	$512, %ebx # x=256
 53 | 	subl	8(%esp), %edi # here (%esp) is pushed edi, 4(%esp) is left
 54 |         inc     %ebp # ofs++
 55 | 	inc     %esi
 56 | 	#movl	$48, %edx
 57 | 	lea     47(%ebp),%edx
 58 | 	cmp     $652630,%edi
 59 | 	ja      .low
 60 | 	mov     $60,%dl
 61 | 	cmp     $49549,%edi
 62 | 	ja      .low
 63 | 	mov     $80,%dl
 64 | .low:
 65 | .loop_ofs:
 66 | 	addl    %edi,%edx # top+=total
 67 | 	cmpl    %edx,%ebx # cmp total+top,x
 68 | 	jns     .ofs_last_bit
 69 | 	subl    %edi,%edx # top-=total
 70 | 	test    $0x055ffc00,%ebx
 71 | 	jz      .noshift
 72 | 	leal    (%edx,%edx,8),%edx # top*=9
 73 | 	shrl    $3,%edx  # top>>=3
 74 | .noshift:
 75 | 	cmp     %edx,%ecx # cmp top,res
 76 | 	jl      .ofs_final_calc # if res<top
 77 | 	subl    %edx,%ebp # ofs-=top
 78 | 	addl    %edx,%edi # total+=top
 79 | 	addl    %edx,%edx # top+=top
 80 | 	addl    %eax,%eax
 81 | 	jnz	.L18
 82 | 	lodsl
 83 | 	adcl    %eax,%eax
 84 | .L18:
 85 | 	adcl    %ecx,%ecx
 86 | 	addl	%ebx, %ebx # x+=x
 87 | 	jmp	.loop_ofs
 88 | 
 89 | .ofs_last_bit:
 90 | 	subl    %edi,%ebx # x-=total
 91 | 	cmpl	%ebx, %ecx # cmp x,res
 92 | 	jl	.ofs_final_calc # if res<x goto final
 93 | 	addl    %eax,%eax
 94 | 	jnz	.L21
 95 | 	lodsl
 96 | 	adcl    %eax,%eax
 97 | .L21:
 98 | 	adcl    %ecx,%ecx
 99 | 	subl    %ebx,%ecx # res-=x
100 | .ofs_final_calc:
101 | 	popl    %edi # restore dst
102 | 	addl	%ecx, %ebp # ofs+=res
103 | 	cmpl	$5400, %ebp
104 | 	sbb     %ebx,%ebx
105 | 	cmpl	$0x60000, %ebp
106 | 	sbb     $-3,%ebx
107 | .load_len:
108 | 	addl    %eax,%eax
109 | 	jnz	.len2b
110 | 	lodsl
111 | 	adcl    %eax,%eax
112 | .len2b:
113 | 	jc	.lenlonger
114 | 	addl    %eax,%eax
115 | 	jnz	.len2b2
116 | 	lodsl
117 | 	adcl    %eax,%eax
118 | .len2b2:
119 | 	adcl    $1,%ebx
120 | 	jmp	.copy_match
121 | 
122 | .lenlonger:
123 | 	addl	$2, %ebx
124 | 	movl	$1, %edx
125 | 	xorl	%ecx, %ecx
126 | .lenloop:
127 | 	addl	%edx, %ebx
128 | 	addl	%edx, %edx
129 | 	addl    %eax,%eax
130 | 	jnz	.L26
131 | 	lodsl
132 | 	adcl    %eax,%eax
133 | .L26:
134 | 	adcl    %ecx,%ecx
135 | 
136 | 	addl    %eax,%eax
137 | 	jnz	.L29
138 | 	lodsl
139 | 	adcl    %eax,%eax
140 | .L29:
141 | 	jc	.lenloop
142 | 	addl	%ecx, %ebx
143 | .copy_match:
144 |         pushl    %esi
145 | 	movl	%edi, %esi
146 | 	movl	%ebx, %ecx
147 | 	subl	%ebp, %esi
148 | 	rep movsb
149 | 	popl    %esi
150 | 	popl    %edx
151 | 	subl	%ebx, %edx # left-=len
152 | 	jns	.checkleft
153 | .unpack_ret:
154 | 	popl    %edi # just smallest way to dec esp
155 | 
156 | 	popl	%ebx
157 | 	popl	%esi
158 | 	popl	%edi
159 | 	popl	%ebp
160 | 	ret
161 | 	.size	unpack_x86, .-unpack_x86
162 | 


--------------------------------------------------------------------------------
/x86/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	g++ -O2 -pipe x86.cpp -o x86
3 | 


--------------------------------------------------------------------------------
/x86/x86.cpp:
--------------------------------------------------------------------------------
  1 | typedef unsigned int uint;
  2 | typedef unsigned char  uc;
  3 | 
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | 
  7 | uint BytesLoaded;
  8 | 
  9 | uint flen( FILE* f )
 10 | {
 11 |   fseek( f, 0, SEEK_END );
 12 |   uint len = ftell(f);
 13 |   fseek( f, 0, SEEK_SET );
 14 |   return len;
 15 | }
 16 | 
 17 | void* fload( char* fname )
 18 | {
 19 |   FILE* temp = fopen(fname,"rb");
 20 |   if (temp==0) return 0;
 21 |   unsigned int len = flen(temp);
 22 |   BytesLoaded = len;
 23 |   char* buf = new char[len];
 24 |   fread( buf, len, 1, temp );
 25 |   fclose( temp );
 26 |   return buf;
 27 | }
 28 | 
 29 | void fsave( void* buf, unsigned int len, char* fname )
 30 | {
 31 |   FILE* temp = fopen(fname,"wb");
 32 |   fwrite( buf, len, 1, temp );
 33 |   fclose( temp );
 34 | }
 35 | 
 36 | uint fgetd( FILE* file)
 37 | {
 38 |   return fgetc(file)+(fgetc(file)<<8)+(fgetc(file)<<16)+(fgetc(file)<<24);
 39 | }
 40 | 
 41 | uint fgetw( FILE* file)
 42 | {
 43 |   return fgetc(file)+(fgetc(file)<<8);
 44 | }
 45 | 
 46 | void fputd( uint c, FILE* file )
 47 | {
 48 |   fputc( c    , file );
 49 |   fputc( c>> 8, file );
 50 |   fputc( c>>16, file );
 51 |   fputc( c>>24, file );
 52 | }
 53 | 
 54 | 
 55 | void fputw( uint c, FILE* file )
 56 | {
 57 |   fputc( c    , file );
 58 |   fputc( c>> 8, file );
 59 | }
 60 | #define Psh(c) ( c==0x06 || c==0x16 || c==0x1E || (c>0x4F && c<0x58) )
 61 | 
 62 | #define wswap(a) ( ((a)>>8) + (((a)&255)<<8) )
 63 | #define bswap(a) ( wswap((a)>>16)+(wswap((a)&65535)<<16) )
 64 | //#define _bsw(a,i,h) (((uc(&)[4])(a))[i]<<(h))
 65 | //#define bswap(a) ( _bsw(a,0,24)+_bsw(a,1,16)+_bsw(a,2,8)+_bsw(a,3,0) )
 66 | 
 67 | #include <map>
 68 | std::map<int,int> cofs;
 69 | std::map<int,int> jofs;
 70 | #define mask 0xffffe000
 71 | #define shift 0x1000
 72 | int main(int argc,char* argv[])
 73 | {
 74 | int cn=0;
 75 | int jn=0;
 76 |   int i,j,k,len; uint a,b;
 77 | 
 78 |   uc* p; uc* q;
 79 |   FILE* Codes = fopen("main.dat","wb");
 80 |   FILE* Calls = fopen("calls.dat","wb");
 81 | //  FILE* Calls2 = fopen("calls2.dat","wb");
 82 |   FILE* Jumps = fopen("jumps.dat","wb");
 83 |   FILE* Flags = fopen("flags.dat","wb");
 84 | 
 85 |   uc* Text = (uc*)fload(argv[1]); p=Text;
 86 | 
 87 |   for( i=0; i<BytesLoaded; i++ )
 88 |   { 
 89 |     if ( p[i]==0xE8 && i<=BytesLoaded-5-3 )  
 90 |     {
 91 |       a = i+5 + (uint&)p[i+1];
 92 | 
 93 |       if ( a<BytesLoaded ) {
 94 |         a+=shift;
 95 |         a&=mask;
 96 |         if (cofs.count(a)) cofs[a]++;
 97 | 	else cofs[a]=1;
 98 |         i+=4;
 99 |       }
100 |     }
101 | 
102 |     if ( p[i]==0xE9 && i<=BytesLoaded-5-3 ) {
103 |       a = i+5 + (uint&)p[i+1];
104 |       if ( a<BytesLoaded ) {
105 |         a+=shift;
106 |         a&=mask;
107 |         if (jofs.count(a)) jofs[a]++;
108 | 	else jofs[a]=1;
109 |         i+=4;
110 |       }
111 | 
112 |     } 
113 | 
114 |     if ( p[i]==0x0F && (p[i+1]&0xF0)==0x80 && i<=BytesLoaded-6 ) {
115 |       a = i+6 + (uint&)p[i+2];
116 |       if ( a<BytesLoaded ) {
117 |         a+=shift;
118 |         a&=mask;
119 |         if (jofs.count(a)) jofs[a]++;
120 | 	else jofs[a]=1;
121 |         i+=4;
122 |       }
123 | 
124 |     } 
125 |  
126 |   }
127 |   for( i=0; i<BytesLoaded; i++ )
128 |   { 
129 | 
130 |     fputc( p[i], Codes );
131 | 
132 |     if ( p[i]==0xE8 && i<=BytesLoaded-5-3 )  
133 |     {
134 |       a = i+5 + (uint&)p[i+1];
135 | //result becomes in [0..n)
136 | //initially we are at [-i..n-i)
137 | //if (a<BytesLoaded+i) {
138 | int use=0;
139 | 
140 |       if ( a<BytesLoaded && cofs.count((a+shift)&mask) && cofs[(a+shift)&mask]>1 ) use=1;
141 |       if ( a<BytesLoaded && Psh(p[a]) ) use=1;
142 |       if ( a<BytesLoaded && a>0 && p[a-1]==0xc3 ) use=1;
143 | 
144 |       if ( use ) {
145 |         putc( 0x00, Flags );
146 | 
147 |           fputd( bswap(a), Calls );
148 | 
149 | //        if ( p[i+5]==0x83 && p[i+6]==0xC4 && p[i+7]>0 ) {
150 | //         fputc( 1/*p[i+7]*/, Calls2 );
151 | //         i+=2;//3;
152 | //        } else {
153 | //         fputc( 0x00, Calls2 );
154 | //        }
155 | 
156 |         i+=4;
157 |       } else {
158 |         putc( 0x01, Flags );
159 |       }
160 | //      }
161 |     }
162 | 
163 |     if ( p[i]==0xE9 && i<=BytesLoaded-5-3 ) {
164 |       a = i+5 + (uint&)p[i+1];
165 | //if (a<BytesLoaded+i) {
166 | int use=0;
167 | 
168 |       if ( a<BytesLoaded && jofs.count((a+shift)&mask) && jofs[(a+shift)&mask]>1 ) use=1;
169 |       if ( a<BytesLoaded && Psh(p[a]) ) use=1;
170 |       if ( use ) {
171 |         putc( 0x00, Flags );
172 |         fputd( bswap( a ), Jumps );
173 |         i+=4;
174 |       } else {
175 |         putc( 0x01, Flags );
176 |       }
177 | //}
178 |     } 
179 | 
180 |     if ( p[i]==0x0F && (p[i+1]&0xF0)==0x80 && i<=BytesLoaded-6 ) {
181 |       a = i+6 + (uint&)p[i+2];
182 | //if (a<BytesLoaded+i) {
183 | int use=0;
184 | 
185 |       if ( a<BytesLoaded && jofs.count((a+shift)&mask) && jofs[(a+shift)&mask]>1 ) use=1;
186 |       if ( a<BytesLoaded && Psh(p[a]) ) use=1;
187 |       if ( use ) {
188 |         putc( 0x00, Flags );
189 |         fputd( bswap( a ), Jumps );
190 |         i+=4;
191 |       } else {
192 |         putc( 0x01, Flags );
193 |       }
194 | //}
195 |     } 
196 |  
197 |   }
198 |   printf("\n");
199 |  
200 | }
201 | 
202 | 


--------------------------------------------------------------------------------