├── .gitignore
├── LICENSE
├── Makefile
├── ari
├── compbit.c
├── complen.c
├── complit.c
├── port.h
├── rangecod.c
└── rangecod.h
├── bpe.h
├── divsufsort.c
├── divsufsort.h
├── e8.h
├── lzoma.h
├── pack.c
├── readme.MSVC
├── readme.txt
├── unpack.c
├── unpack_lzoma.S
└── x86
├── Makefile
└── x86.cpp
/.gitignore:
--------------------------------------------------------------------------------
1 | # Object files
2 | *.o
3 | *.ko
4 | *.obj
5 | *.elf
6 |
7 | # Precompiled Headers
8 | *.gch
9 | *.pch
10 |
11 | # Libraries
12 | *.lib
13 | *.a
14 | *.la
15 | *.lo
16 |
17 | # Shared objects (inc. Windows DLLs)
18 | *.dll
19 | *.so
20 | *.so.*
21 | *.dylib
22 |
23 | # Executables
24 | *.exe
25 | *.out
26 | *.app
27 | *.i*86
28 | *.x86_64
29 | *.hex
30 |
31 | # Debug files
32 | *.dSYM/
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
341 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | gcc -O2 -pipe pack.c divsufsort.c -o pack
3 | gcc -Os -fomit-frame-pointer -std=c99 -Os -pipe unpack.c -o unpack
4 |
5 | asm_x86:
6 | gcc -O2 -pipe pack.c divsufsort.c -o pack
7 | gcc -DASM_X86 -m32 -Os -fomit-frame-pointer -std=c99 -pipe unpack.c unpack_lzoma.S -o unpack
8 |
9 | test:
10 | ./pack pack.c pack.c.lzoma && ./unpack pack.c.lzoma pack.c.test && md5sum pack.c pack.c.test
11 |
--------------------------------------------------------------------------------
/ari/compbit.c:
--------------------------------------------------------------------------------
1 | /*
2 | comp.c headerfile for quasistatic probability model
3 |
4 | (c) Michael Schindler
5 | 1997, 1998, 1999, 2000
6 | http://www.compressconsult.com/
7 | michael@compressconsult.com
8 |
9 | This program is free software; you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation; either version 2 of the License, or
12 | (at your option) any later version.
13 |
14 | This program is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | GNU General Public License for more details. It may be that this
18 | program violates local patents in your country, however it is
19 | belived (NO WARRANTY!) to be patent-free here in Austria.
20 |
21 | You should have received a copy of the GNU General Public License
22 | along with this program; if not, write to the Free Software
23 | Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24 | MA 02111-1307, USA.
25 |
26 | comp is an example compressor trying to compress files with a simple
27 | order 0 model. The files can be decompressed by decomp.
28 |
29 | Note that I do not think that an order 0 model as here is good;
30 | For better compression see for example my freeware szip.
31 | http://www.compressconsult.com/szip/
32 | or ask me as consultant what compression method fits your data best.
33 | */
34 |
35 | #include
36 | #include
37 | #ifndef unix
38 | #include
39 | #include
40 | #endif
41 | #include
42 | #include
43 | #include "port.h"
44 | #include "rangecod.h"
45 |
46 | void usage()
47 | { fprintf(stderr,"comp [inputfile [outputfile]]\n");
48 | fprintf(stderr,"comp (c)1997.1998 Michael Schindler, michael@compressconsult\n");
49 | exit(1);
50 | }
51 |
52 | int main( int argc, char *argv[] )
53 | { int ch, syfreq, ltfreq;
54 | rangecoder rc;
55 |
56 | if ((argc > 3) || ((argc>0) && (argv[1][0]=='-')))
57 | usage();
58 |
59 | if ( argc<1 )
60 | fprintf( stderr, "stdin" );
61 | else
62 | { freopen( argv[1], "rb", stdin );
63 | fprintf( stderr, "%s", argv[1] );
64 | }
65 | if ( argc<2 )
66 | fprintf( stderr, " to stdout\n" );
67 | else
68 | { freopen( argv[2], "wb", stdout );
69 | fprintf( stderr, " to %s\n", argv[2] );
70 | }
71 | fprintf( stderr, "%s\n", coderversion);
72 |
73 | #ifndef unix
74 | setmode( fileno( stdin ), O_BINARY );
75 | setmode( fileno( stdout ), O_BINARY );
76 | #endif
77 |
78 | start_encoding(&rc,0,0);
79 | int prop=32768;
80 | int prev=0;
81 |
82 | /* do the coding */
83 | while ((ch=getc(stdin))!=EOF)
84 | {
85 | encbit(&rc,ch,&prop);
86 | }
87 |
88 | done_encoding(&rc);
89 |
90 | return 0;
91 | }
92 |
--------------------------------------------------------------------------------
/ari/complen.c:
--------------------------------------------------------------------------------
1 | /*
2 | comp.c headerfile for quasistatic probability model
3 |
4 | (c) Michael Schindler
5 | 1997, 1998, 1999, 2000
6 | http://www.compressconsult.com/
7 | michael@compressconsult.com
8 |
9 | This program is free software; you can redistribute it and/or modify
10 | it under the terms of the GNU General Public License as published by
11 | the Free Software Foundation; either version 2 of the License, or
12 | (at your option) any later version.
13 |
14 | This program is distributed in the hope that it will be useful,
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | GNU General Public License for more details. It may be that this
18 | program violates local patents in your country, however it is
19 | belived (NO WARRANTY!) to be patent-free here in Austria.
20 |
21 | You should have received a copy of the GNU General Public License
22 | along with this program; if not, write to the Free Software
23 | Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24 | MA 02111-1307, USA.
25 |
26 | comp is an example compressor trying to compress files with a simple
27 | order 0 model. The files can be decompressed by decomp.
28 |
29 | Note that I do not think that an order 0 model as here is good;
30 | For better compression see for example my freeware szip.
31 | http://www.compressconsult.com/szip/
32 | or ask me as consultant what compression method fits your data best.
33 | */
34 |
35 | #include
36 | #include
37 | #ifndef unix
38 | #include
39 | #include
40 | #endif
41 | #include
42 | #include
43 | #include "port.h"
44 | #include "rangecod.h"
45 |
46 | void usage()
47 | { fprintf(stderr,"comp [inputfile [outputfile]]\n");
48 | fprintf(stderr,"comp (c)1997.1998 Michael Schindler, michael@compressconsult\n");
49 | exit(1);
50 | }
51 |
52 | int main( int argc, char *argv[] )
53 | { int ch1,ch2,ch3,ch4, syfreq, ltfreq;
54 | rangecoder rc;
55 | //qsmodel qsm[48];
56 | int prop[48];
57 |
58 | if ((argc > 3) || ((argc>0) && (argv[1][0]=='-')))
59 | usage();
60 |
61 | if ( argc<1 )
62 | fprintf( stderr, "stdin" );
63 | else
64 | { freopen( argv[1], "rb", stdin );
65 | fprintf( stderr, "%s", argv[1] );
66 | }
67 | if ( argc<2 )
68 | fprintf( stderr, " to stdout\n" );
69 | else
70 | { freopen( argv[2], "wb", stdout );
71 | fprintf( stderr, " to %s\n", argv[2] );
72 | }
73 | fprintf( stderr, "%s\n", coderversion);
74 |
75 | #ifndef unix
76 | setmode( fileno( stdin ), O_BINARY );
77 | setmode( fileno( stdout ), O_BINARY );
78 | #endif
79 |
80 | /* make an alphabet with 257 symbols, use 256 as end-of-file */
81 | #define SMALL 25
82 | //#define SMALL 400
83 | int j;
84 | for(j=0;j<48;j++) prop[j]=32768;
85 | // initqsmodel(&qsm[j],2,12,200,NULL,1);
86 |
87 | start_encoding(&rc,0,0);
88 |
89 | /* do the coding */
90 | while (1)
91 | {
92 | int len;
93 | len = 0;
94 | if ((ch1=getc(stdin))==EOF) break;
95 | if ((ch2=getc(stdin))==EOF) break;
96 | if ((ch3=getc(stdin))==EOF) break;
97 | if ((ch4=getc(stdin))==EOF) break;
98 | len = ch4; len<<=8;
99 | len += ch3; len<<=8;
100 | len += ch2; len<<=8;
101 | len += ch1;
102 | //fprintf(stderr,"%d\n",len);
103 | int i=0;
104 | for(;;) {
105 | encbit(&rc,len&1,prop+i);i++;
106 | // qsgetfreq(&qsm[i],len&1,&syfreq,<freq);
107 | // encode_shift(&rc,syfreq,ltfreq,12);
108 | // qsupdate(&qsm[i],len&1);
109 | len>>=1;
110 | // i++;
111 | if (len==0) {
112 | encbit(&rc,1,prop+i);
113 | // qsgetfreq(&qsm[i],1,&syfreq,<freq);
114 | // encode_shift(&rc,syfreq,ltfreq,12);
115 | // qsupdate(&qsm[i],1);
116 | break;
117 | }
118 | encbit(&rc,0,prop+i);i++;
119 | // qsgetfreq(&qsm[i],0,&syfreq,<freq);
120 | // encode_shift(&rc,syfreq,ltfreq,12);
121 | // qsupdate(&qsm[i],0);
122 | // i++;
123 | len--;
124 | }
125 | }
126 | /* write 256 as end-of-file */
127 | // qsgetfreq(&qsm1,SMALL,&syfreq,<freq);
128 | // encode_shift(&rc,syfreq,ltfreq,12);
129 |
130 | done_encoding(&rc);
131 |
132 | return 0;
133 | }
134 |
--------------------------------------------------------------------------------
/ari/complit.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #ifndef unix
4 | #include
5 | #include
6 | #endif
7 | #include
8 | #include
9 | #include "port.h"
10 | #include "rangecod.h"
11 |
12 | void usage()
13 | { fprintf(stderr,"comp [inputfile [outputfile]]\n");
14 | fprintf(stderr,"comp (c)1997.1998 Michael Schindler, michael@compressconsult\n");
15 | exit(1);
16 | }
17 |
18 | int main( int argc, char *argv[] )
19 | { int ch1,ch2,ch3,ch4, syfreq, ltfreq;
20 | rangecoder rc;
21 | int prop[256];
22 |
23 | if ((argc > 3) || ((argc>0) && (argv[1][0]=='-')))
24 | usage();
25 |
26 | if ( argc<1 )
27 | fprintf( stderr, "stdin" );
28 | else
29 | { freopen( argv[1], "rb", stdin );
30 | fprintf( stderr, "%s", argv[1] );
31 | }
32 | if ( argc<2 )
33 | fprintf( stderr, " to stdout\n" );
34 | else
35 | { freopen( argv[2], "wb", stdout );
36 | fprintf( stderr, " to %s\n", argv[2] );
37 | }
38 | fprintf( stderr, "%s\n", coderversion);
39 |
40 | #ifndef unix
41 | setmode( fileno( stdin ), O_BINARY );
42 | setmode( fileno( stdout ), O_BINARY );
43 | #endif
44 |
45 | int j;
46 | for(j=0;j<256;j++) prop[j]=32768;
47 |
48 | start_encoding(&rc,0,0);
49 | /* do the coding */
50 | while (1)
51 | {
52 | unsigned char len;
53 | if ((ch1=getc(stdin))==EOF) break;
54 | len = ch1;
55 | //fprintf(stderr,"%d\n",len);
56 | int ctx=1;
57 | for(;ctx<256;) {
58 | encbit(&rc,len>>7,prop+ctx);
59 | ctx+=ctx+(len>>7);
60 | len+=len;
61 | }
62 | }
63 | done_encoding(&rc);
64 |
65 | return 0;
66 | }
67 |
--------------------------------------------------------------------------------
/ari/port.h:
--------------------------------------------------------------------------------
1 | #ifndef port_h
2 | #define port_h
3 | #include
4 |
5 | #ifdef GCC
6 | #define Inline inline
7 | #else
8 | #define Inline __inline
9 | #endif
10 |
11 | #if INT_MAX > 0x7FFF
12 | typedef unsigned short uint2; /* two-byte integer (large arrays) */
13 | typedef unsigned int uint4; /* four-byte integers (range needed) */
14 | #else
15 | typedef unsigned int uint2;
16 | typedef unsigned long uint4;
17 | #endif
18 |
19 | typedef unsigned int uint; /* fast unsigned integer, 2 or 4 bytes */
20 |
21 | #endif
22 |
--------------------------------------------------------------------------------
/ari/rangecod.c:
--------------------------------------------------------------------------------
1 | #define NOWARN
2 |
3 | /*
4 | define EXTRAFAST for increased speed; you loose compression and
5 | compatibility in exchange.
6 | */
7 | //#define EXTRAFAST
8 |
9 | #include
10 | #include "port.h"
11 | #include "rangecod.h"
12 |
13 | void encbit(rangecoder *rc,int bit, int *prop)
14 | {
15 | int prob=(*prop)>>3;
16 |
17 | int x=6;
18 |
19 | if (bit) {
20 | encode_shift(rc,8192-prob,prob,13);
21 | *prop-=(*prop)>>x;
22 | } else {
23 | encode_shift(rc,prob,0,13);
24 | *prop+=(65536-(*prop))>>x;
25 | }
26 | *prop&=~1;
27 | *prop|=bit;
28 | }
29 | /* SIZE OF RANGE ENCODING CODE VALUES. */
30 |
31 | #define CODE_BITS 32
32 | #define Top_value ((code_value)1 << (CODE_BITS-1))
33 |
34 |
35 | /* all IO is done by these macros - change them if you want to */
36 | /* no checking is done - do it here if you want it */
37 | /* cod is a pointer to the used rangecoder */
38 | #define outbyte(cod,x) putchar(x)
39 | #define inbyte(cod) getchar()
40 |
41 |
42 | #ifdef RENORM95
43 | #include "renorm95.c"
44 |
45 | #else
46 | #define SHIFT_BITS (CODE_BITS - 9)
47 | #define EXTRA_BITS ((CODE_BITS-2) % 8 + 1)
48 | #define Bottom_value (Top_value >> 8)
49 |
50 | #ifdef NOWARN
51 | #ifdef GLOBALRANGECODER
52 | char coderversion[]="rangecoder 1.3 NOWARN GLOBAL (c) 1997-2000 Michael Schindler";
53 | #else
54 | char coderversion[]="rangecoder 1.3 NOWARN (c) 1997-2000 Michael Schindler";
55 | #endif
56 | #else /*NOWARN*/
57 | #ifdef GLOBALRANGECODER
58 | char coderversion[]="rangecoder 1.3 GLOBAL (c) 1997-2000 Michael Schindler";
59 | #else
60 | char coderversion[]="rangecoder 1.3 (c) 1997-2000 Michael Schindler";
61 | #endif
62 | #endif /*NOWARN*/
63 | #endif /*RENORM95*/
64 |
65 |
66 | #ifdef GLOBALRANGECODER
67 | /* if this is defined we'll make a global variable rngc and */
68 | /* make RNGC use that var; we'll also omit unneeded parameters */
69 | static rangecoder rngc;
70 | #define RNGC (rngc)
71 | #define M_outbyte(a) outbyte(&rngc,a)
72 | #define M_inbyte inbyte(&rngc)
73 | #define enc_normalize(rc) M_enc_normalize()
74 | #define dec_normalize(rc) M_dec_normalize()
75 | #else
76 | #define RNGC (*rc)
77 | #define M_outbyte(a) outbyte(rc,a)
78 | #define M_inbyte inbyte(rc)
79 | #endif
80 |
81 |
82 | /* rc is the range coder to be used */
83 | /* c is written as first byte in the datastream */
84 | /* one could do without c, but then you have an additional if */
85 | /* per outputbyte. */
86 | void start_encoding( rangecoder *rc, char c, int initlength )
87 | { RNGC.low = 0; /* Full code range */
88 | RNGC.range = Top_value;
89 | RNGC.buffer = c;
90 | RNGC.help = 0; /* No bytes to follow */
91 | RNGC.bytecount = initlength;
92 | }
93 |
94 |
95 | #ifndef RENORM95
96 | /* I do the normalization before I need a defined state instead of */
97 | /* after messing it up. This simplifies starting and ending. */
98 | static Inline void enc_normalize( rangecoder *rc )
99 | { while(RNGC.range <= Bottom_value) /* do we need renormalisation? */
100 | { if (RNGC.low < (code_value)0xff< output */
101 | { M_outbyte(RNGC.buffer);
102 | for(; RNGC.help; RNGC.help--)
103 | M_outbyte(0xff);
104 | RNGC.buffer = (unsigned char)(RNGC.low >> SHIFT_BITS);
105 | } else if (RNGC.low & Top_value) /* carry now, no future carry */
106 | { M_outbyte(RNGC.buffer+1);
107 | for(; RNGC.help; RNGC.help--)
108 | M_outbyte(0);
109 | RNGC.buffer = (unsigned char)(RNGC.low >> SHIFT_BITS);
110 | } else /* passes on a potential carry */
111 | #ifdef NOWARN
112 | RNGC.help++;
113 | #else
114 | if (RNGC.bytestofollow++ == 0xffffffffL)
115 | { fprintf(stderr,"Too many bytes outstanding - File too large\n");
116 | exit(1);
117 | }
118 | #endif
119 | RNGC.range <<= 8;
120 | RNGC.low = (RNGC.low<<8) & (Top_value-1);
121 | RNGC.bytecount++;
122 | }
123 | }
124 | #endif
125 |
126 |
127 | /* Encode a symbol using frequencies */
128 | /* rc is the range coder to be used */
129 | /* sy_f is the interval length (frequency of the symbol) */
130 | /* lt_f is the lower end (frequency sum of < symbols) */
131 | /* tot_f is the total interval length (total frequency sum) */
132 | /* or (faster): tot_f = (code_value)1<> shift;
153 | tmp = r * lt_f;
154 | RNGC.low += tmp;
155 | #ifdef EXTRAFAST
156 | RNGC.range = r * sy_f;
157 | #else
158 | if ((lt_f+sy_f) >> shift)
159 | RNGC.range -= tmp;
160 | else
161 | RNGC.range = r * sy_f;
162 | #endif
163 | }
164 |
165 |
166 | #ifndef RENORM95
167 | /* Finish encoding */
168 | /* rc is the range coder to be used */
169 | /* actually not that many bytes need to be output, but who */
170 | /* cares. I output them because decode will read them :) */
171 | /* the return value is the number of bytes written */
172 | uint4 done_encoding( rangecoder *rc )
173 | { uint tmp;
174 | enc_normalize(rc); /* now we have a normalized state */
175 | RNGC.bytecount += 5;
176 | if ((RNGC.low & (Bottom_value-1)) < ((RNGC.bytecount&0xffffffL)>>1))
177 | tmp = RNGC.low >> SHIFT_BITS;
178 | else
179 | tmp = (RNGC.low >> SHIFT_BITS) + 1;
180 | if (tmp > 0xff) /* we have a carry */
181 | { M_outbyte(RNGC.buffer+1);
182 | for(; RNGC.help; RNGC.help--)
183 | M_outbyte(0);
184 | } else /* no carry */
185 | { M_outbyte(RNGC.buffer);
186 | for(; RNGC.help; RNGC.help--)
187 | M_outbyte(0xff);
188 | }
189 | M_outbyte(tmp & 0xff);
190 | M_outbyte((RNGC.bytecount>>16) & 0xff);
191 | M_outbyte((RNGC.bytecount>>8) & 0xff);
192 | M_outbyte(RNGC.bytecount & 0xff);
193 | return RNGC.bytecount;
194 | }
195 |
196 |
197 | /* Start the decoder */
198 | /* rc is the range coder to be used */
199 | /* returns the char from start_encoding or EOF */
200 | int start_decoding( rangecoder *rc )
201 | { int c = M_inbyte;
202 | if (c==EOF)
203 | return EOF;
204 | RNGC.buffer = M_inbyte;
205 | RNGC.low = RNGC.buffer >> (8-EXTRA_BITS);
206 | RNGC.range = (code_value)1 << EXTRA_BITS;
207 | return c;
208 | }
209 |
210 |
211 | static Inline void dec_normalize( rangecoder *rc )
212 | { while (RNGC.range <= Bottom_value)
213 | { RNGC.low = (RNGC.low<<8) | ((RNGC.buffer<> (8-EXTRA_BITS);
216 | RNGC.range <<= 8;
217 | }
218 | }
219 | #endif
220 |
221 |
222 | /* Calculate culmulative frequency for next symbol. Does NO update!*/
223 | /* rc is the range coder to be used */
224 | /* tot_f is the total frequency */
225 | /* or: totf is (code_value)1<=tot_f ? tot_f-1 : tmp);
236 | #endif
237 | }
238 |
239 | freq decode_culshift( rangecoder *rc, freq shift )
240 | { freq tmp;
241 | dec_normalize(rc);
242 | RNGC.help = RNGC.range>>shift;
243 | tmp = RNGC.low/RNGC.help;
244 | #ifdef EXTRAFAST
245 | return tmp;
246 | #else
247 | return (tmp>>shift ? ((code_value)1<
69 | #if INT_MAX > 0xffff
70 | typedef unsigned int uint4;
71 | typedef unsigned short uint2;
72 | #else
73 | typedef unsigned long uint4;
74 | typedef unsigned int uint2;
75 | #endif
76 | #endif
77 |
78 | extern char coderversion[];
79 |
80 | typedef uint4 code_value; /* Type of an rangecode value */
81 | /* must accomodate 32 bits */
82 | /* it is highly recommended that the total frequency count is less */
83 | /* than 1 << 19 to minimize rounding effects. */
84 | /* the total frequency count MUST be less than 1<<23 */
85 |
86 | typedef uint4 freq;
87 |
88 | /* make the following private in the arithcoder object in C++ */
89 |
90 | typedef struct {
91 | uint4 low, /* low end of interval */
92 | range, /* length of interval */
93 | help; /* bytes_to_follow resp. intermediate value */
94 | unsigned char buffer;/* buffer for input/output */
95 | /* the following is used only when encoding */
96 | uint4 bytecount; /* counter for outputed bytes */
97 | /* insert fields you need for input/output below this line! */
98 | } rangecoder;
99 |
100 |
101 | void encbit(rangecoder *rc, int bit, int *prop);
102 | /* supply the following as methods of the arithcoder object */
103 | /* omit the first parameter then (C++) */
104 | #ifdef GLOBALRANGECODER
105 | #define start_encoding(rc,a,b) M_start_encoding(a,b)
106 | #define encode_freq(rc,a,b,c) M_encode_freq(a,b,c)
107 | #define encode_shift(rc,a,b,c) M_encode_shift(a,b,c)
108 | #define done_encoding(rc) M_done_encoding()
109 | #define start_decoding(rc) M_start_decoding()
110 | #define decode_culfreq(rc,a) M_decode_culfreq(a)
111 | #define decode_culshift(rc,a) M_decode_culshift(a)
112 | #define decode_update(rc,a,b,c) M_decode_update(a,b,c)
113 | #define decode_byte(rc) M_decode_byte()
114 | #define decode_short(rc) M_decode_short()
115 | #define done_decoding(rc) M_done_decoding()
116 | #endif
117 |
118 |
119 | /* Start the encoder */
120 | /* rc is the range coder to be used */
121 | /* c is written as first byte in the datastream (header,...) */
122 | void start_encoding( rangecoder *rc, char c, int initlength);
123 |
124 |
125 | /* Encode a symbol using frequencies */
126 | /* rc is the range coder to be used */
127 | /* sy_f is the interval length (frequency of the symbol) */
128 | /* lt_f is the lower end (frequency sum of < symbols) */
129 | /* tot_f is the total interval length (total frequency sum) */
130 | /* or (a lot faster): tot_f = 1<
2 | #include
3 |
4 | uint8_t bpe_flags[8192];
5 |
6 | static inline void set_bpe(uint8_t a,uint8_t b)
7 | {
8 | int ab=a;
9 | ab<<=5;
10 | ab+=b>>3;
11 | bpe_flags[ab]|=(1<<(b&7));
12 | }
13 |
14 | static inline void unset_bpe(uint8_t a,uint8_t b)
15 | {
16 | int ab=a;
17 | ab<<=5;
18 | ab+=b>>3;
19 | bpe_flags[ab]&=~(1<<(b&7));
20 | }
21 |
22 | static inline int has_bpe(uint8_t a,uint8_t b)
23 | {
24 | int ab=a;
25 | ab<<=5;
26 | ab+=b>>3;
27 | return bpe_flags[ab]&(1<<(b&7));
28 | }
29 |
30 | #define BPE 1024
31 | int bpe_last_ofs[BPE];
32 | int bpe_num;
33 | int bpe_head;
34 |
35 | void bpe_init() {
36 | bpe_num=0;
37 | bpe_head=0;
38 | memset(bpe_flags,0,8192);
39 | }
40 |
41 | void bpe_push(uint8_t *buf, int pos)
42 | {
43 | if (pos<2) return;
44 | uint8_t a=buf[pos-2];
45 | uint8_t b=buf[pos-1];
46 | if (has_bpe(a,b)) {
47 | return;
48 | }
49 | if (bpe_num==BPE) {
50 | int prev_pos=bpe_last_ofs[bpe_head];
51 | uint8_t pa=buf[prev_pos];
52 | uint8_t pb=buf[prev_pos+1];
53 | unset_bpe(pa,pb);
54 | }
55 | bpe_last_ofs[bpe_head++]=pos-2;
56 | if (bpe_head==BPE) bpe_head=0;
57 | if (bpe_num
28 | #include
29 | #ifdef _OPENMP
30 | # include
31 | #endif
32 | #include "divsufsort.h"
33 |
34 |
35 | /*- Constants -*/
36 | #define INLINE __inline
37 | #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
38 | # undef ALPHABET_SIZE
39 | #endif
40 | #if !defined(ALPHABET_SIZE)
41 | # define ALPHABET_SIZE (256)
42 | #endif
43 | #define BUCKET_A_SIZE (ALPHABET_SIZE)
44 | #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
45 | #if defined(SS_INSERTIONSORT_THRESHOLD)
46 | # if SS_INSERTIONSORT_THRESHOLD < 1
47 | # undef SS_INSERTIONSORT_THRESHOLD
48 | # define SS_INSERTIONSORT_THRESHOLD (1)
49 | # endif
50 | #else
51 | # define SS_INSERTIONSORT_THRESHOLD (8)
52 | #endif
53 | #if defined(SS_BLOCKSIZE)
54 | # if SS_BLOCKSIZE < 0
55 | # undef SS_BLOCKSIZE
56 | # define SS_BLOCKSIZE (0)
57 | # elif 32768 <= SS_BLOCKSIZE
58 | # undef SS_BLOCKSIZE
59 | # define SS_BLOCKSIZE (32767)
60 | # endif
61 | #else
62 | # define SS_BLOCKSIZE (1024)
63 | #endif
64 | /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
65 | #if SS_BLOCKSIZE == 0
66 | # define SS_MISORT_STACKSIZE (96)
67 | #elif SS_BLOCKSIZE <= 4096
68 | # define SS_MISORT_STACKSIZE (16)
69 | #else
70 | # define SS_MISORT_STACKSIZE (24)
71 | #endif
72 | #define SS_SMERGE_STACKSIZE (32)
73 | #define TR_INSERTIONSORT_THRESHOLD (8)
74 | #define TR_STACKSIZE (64)
75 |
76 |
77 | /*- Macros -*/
78 | #ifndef SWAP
79 | # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
80 | #endif /* SWAP */
81 | #ifndef MIN
82 | # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
83 | #endif /* MIN */
84 | #ifndef MAX
85 | # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
86 | #endif /* MAX */
87 | #define STACK_PUSH(_a, _b, _c, _d)\
88 | do {\
89 | stack[ssize].a = (_a), stack[ssize].b = (_b),\
90 | stack[ssize].c = (_c), stack[ssize++].d = (_d);\
91 | } while(0)
92 | #define STACK_PUSH5(_a, _b, _c, _d, _e)\
93 | do {\
94 | stack[ssize].a = (_a), stack[ssize].b = (_b),\
95 | stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
96 | } while(0)
97 | #define STACK_POP(_a, _b, _c, _d)\
98 | do {\
99 | if(ssize == 0) { return; }\
100 | (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
101 | (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
102 | } while(0)
103 | #define STACK_POP5(_a, _b, _c, _d, _e)\
104 | do {\
105 | if(ssize == 0) { return; }\
106 | (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
107 | (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
108 | } while(0)
109 | #define BUCKET_A(_c0) bucket_A[(_c0)]
110 | #if ALPHABET_SIZE == 256
111 | #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
112 | #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
113 | #else
114 | #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
115 | #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
116 | #endif
117 |
118 |
119 | /*- Private Functions -*/
120 |
121 | static const int lg_table[256]= {
122 | -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
123 | 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
124 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
125 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
126 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
127 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
128 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
129 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
130 | };
131 |
132 | #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
133 |
134 | static INLINE
135 | int
136 | ss_ilg(int n) {
137 | #if SS_BLOCKSIZE == 0
138 | return (n & 0xffff0000) ?
139 | ((n & 0xff000000) ?
140 | 24 + lg_table[(n >> 24) & 0xff] :
141 | 16 + lg_table[(n >> 16) & 0xff]) :
142 | ((n & 0x0000ff00) ?
143 | 8 + lg_table[(n >> 8) & 0xff] :
144 | 0 + lg_table[(n >> 0) & 0xff]);
145 | #elif SS_BLOCKSIZE < 256
146 | return lg_table[n];
147 | #else
148 | return (n & 0xff00) ?
149 | 8 + lg_table[(n >> 8) & 0xff] :
150 | 0 + lg_table[(n >> 0) & 0xff];
151 | #endif
152 | }
153 |
154 | #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
155 |
156 | #if SS_BLOCKSIZE != 0
157 |
158 | static const int sqq_table[256] = {
159 | 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
160 | 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
161 | 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
162 | 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
163 | 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
164 | 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
165 | 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
166 | 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
167 | 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
168 | 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
169 | 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
170 | 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
171 | 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
172 | 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
173 | 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
174 | 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
175 | };
176 |
177 | static INLINE
178 | int
179 | ss_isqrt(int x) {
180 | int y, e;
181 |
182 | if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
183 | e = (x & 0xffff0000) ?
184 | ((x & 0xff000000) ?
185 | 24 + lg_table[(x >> 24) & 0xff] :
186 | 16 + lg_table[(x >> 16) & 0xff]) :
187 | ((x & 0x0000ff00) ?
188 | 8 + lg_table[(x >> 8) & 0xff] :
189 | 0 + lg_table[(x >> 0) & 0xff]);
190 |
191 | if(e >= 16) {
192 | y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
193 | if(e >= 24) { y = (y + 1 + x / y) >> 1; }
194 | y = (y + 1 + x / y) >> 1;
195 | } else if(e >= 8) {
196 | y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
197 | } else {
198 | return sqq_table[x] >> 4;
199 | }
200 |
201 | return (x < (y * y)) ? y - 1 : y;
202 | }
203 |
204 | #endif /* SS_BLOCKSIZE != 0 */
205 |
206 |
207 | /*---------------------------------------------------------------------------*/
208 |
209 | /* Compares two suffixes. */
210 | static INLINE
211 | int
212 | ss_compare(const unsigned char *T,
213 | const int *p1, const int *p2,
214 | int depth) {
215 | const unsigned char *U1, *U2, *U1n, *U2n;
216 |
217 | for(U1 = T + depth + *p1,
218 | U2 = T + depth + *p2,
219 | U1n = T + *(p1 + 1) + 2,
220 | U2n = T + *(p2 + 1) + 2;
221 | (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
222 | ++U1, ++U2) {
223 | }
224 |
225 | return U1 < U1n ?
226 | (U2 < U2n ? *U1 - *U2 : 1) :
227 | (U2 < U2n ? -1 : 0);
228 | }
229 |
230 |
231 | /*---------------------------------------------------------------------------*/
232 |
233 | #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
234 |
235 | /* Insertionsort for small size groups */
236 | static
237 | void
238 | ss_insertionsort(const unsigned char *T, const int *PA,
239 | int *first, int *last, int depth) {
240 | int *i, *j;
241 | int t;
242 | int r;
243 |
244 | for(i = last - 2; first <= i; --i) {
245 | for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
246 | do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
247 | if(last <= j) { break; }
248 | }
249 | if(r == 0) { *j = ~*j; }
250 | *(j - 1) = t;
251 | }
252 | }
253 |
254 | #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
255 |
256 |
257 | /*---------------------------------------------------------------------------*/
258 |
259 | #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
260 |
261 | static INLINE
262 | void
263 | ss_fixdown(const unsigned char *Td, const int *PA,
264 | int *SA, int i, int size) {
265 | int j, k;
266 | int v;
267 | int c, d, e;
268 |
269 | for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
270 | d = Td[PA[SA[k = j++]]];
271 | if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
272 | if(d <= c) { break; }
273 | }
274 | SA[i] = v;
275 | }
276 |
277 | /* Simple top-down heapsort. */
278 | static
279 | void
280 | ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
281 | int i, m;
282 | int t;
283 |
284 | m = size;
285 | if((size % 2) == 0) {
286 | m--;
287 | if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
288 | }
289 |
290 | for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
291 | if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
292 | for(i = m - 1; 0 < i; --i) {
293 | t = SA[0], SA[0] = SA[i];
294 | ss_fixdown(Td, PA, SA, 0, i);
295 | SA[i] = t;
296 | }
297 | }
298 |
299 |
300 | /*---------------------------------------------------------------------------*/
301 |
302 | /* Returns the median of three elements. */
303 | static INLINE
304 | int *
305 | ss_median3(const unsigned char *Td, const int *PA,
306 | int *v1, int *v2, int *v3) {
307 | int *t;
308 | if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
309 | if(Td[PA[*v2]] > Td[PA[*v3]]) {
310 | if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
311 | else { return v3; }
312 | }
313 | return v2;
314 | }
315 |
316 | /* Returns the median of five elements. */
317 | static INLINE
318 | int *
319 | ss_median5(const unsigned char *Td, const int *PA,
320 | int *v1, int *v2, int *v3, int *v4, int *v5) {
321 | int *t;
322 | if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
323 | if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
324 | if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
325 | if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
326 | if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
327 | if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
328 | return v3;
329 | }
330 |
331 | /* Returns the pivot element. */
332 | static INLINE
333 | int *
334 | ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
335 | int *middle;
336 | int t;
337 |
338 | t = last - first;
339 | middle = first + t / 2;
340 |
341 | if(t <= 512) {
342 | if(t <= 32) {
343 | return ss_median3(Td, PA, first, middle, last - 1);
344 | } else {
345 | t >>= 2;
346 | return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
347 | }
348 | }
349 | t >>= 3;
350 | first = ss_median3(Td, PA, first, first + t, first + (t << 1));
351 | middle = ss_median3(Td, PA, middle - t, middle, middle + t);
352 | last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
353 | return ss_median3(Td, PA, first, middle, last);
354 | }
355 |
356 |
357 | /*---------------------------------------------------------------------------*/
358 |
359 | /* Binary partition for substrings. */
360 | static INLINE
361 | int *
362 | ss_partition(const int *PA,
363 | int *first, int *last, int depth) {
364 | int *a, *b;
365 | int t;
366 | for(a = first - 1, b = last;;) {
367 | for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
368 | for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
369 | if(b <= a) { break; }
370 | t = ~*b;
371 | *b = *a;
372 | *a = t;
373 | }
374 | if(first < a) { *first = ~*first; }
375 | return a;
376 | }
377 |
378 | /* Multikey introsort for medium size groups. */
379 | static
380 | void
381 | ss_mintrosort(const unsigned char *T, const int *PA,
382 | int *first, int *last,
383 | int depth) {
384 | #define STACK_SIZE SS_MISORT_STACKSIZE
385 | struct { int *a, *b, c; int d; } stack[STACK_SIZE];
386 | const unsigned char *Td;
387 | int *a, *b, *c, *d, *e, *f;
388 | int s, t;
389 | int ssize;
390 | int limit;
391 | int v, x = 0;
392 |
393 | for(ssize = 0, limit = ss_ilg(last - first);;) {
394 |
395 | if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
396 | #if 1 < SS_INSERTIONSORT_THRESHOLD
397 | if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
398 | #endif
399 | STACK_POP(first, last, depth, limit);
400 | continue;
401 | }
402 |
403 | Td = T + depth;
404 | if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
405 | if(limit < 0) {
406 | for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
407 | if((x = Td[PA[*a]]) != v) {
408 | if(1 < (a - first)) { break; }
409 | v = x;
410 | first = a;
411 | }
412 | }
413 | if(Td[PA[*first] - 1] < v) {
414 | first = ss_partition(PA, first, a, depth);
415 | }
416 | if((a - first) <= (last - a)) {
417 | if(1 < (a - first)) {
418 | STACK_PUSH(a, last, depth, -1);
419 | last = a, depth += 1, limit = ss_ilg(a - first);
420 | } else {
421 | first = a, limit = -1;
422 | }
423 | } else {
424 | if(1 < (last - a)) {
425 | STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
426 | first = a, limit = -1;
427 | } else {
428 | last = a, depth += 1, limit = ss_ilg(a - first);
429 | }
430 | }
431 | continue;
432 | }
433 |
434 | /* choose pivot */
435 | a = ss_pivot(Td, PA, first, last);
436 | v = Td[PA[*a]];
437 | SWAP(*first, *a);
438 |
439 | /* partition */
440 | for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
441 | if(((a = b) < last) && (x < v)) {
442 | for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
443 | if(x == v) { SWAP(*b, *a); ++a; }
444 | }
445 | }
446 | for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
447 | if((b < (d = c)) && (x > v)) {
448 | for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
449 | if(x == v) { SWAP(*c, *d); --d; }
450 | }
451 | }
452 | for(; b < c;) {
453 | SWAP(*b, *c);
454 | for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
455 | if(x == v) { SWAP(*b, *a); ++a; }
456 | }
457 | for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
458 | if(x == v) { SWAP(*c, *d); --d; }
459 | }
460 | }
461 |
462 | if(a <= d) {
463 | c = b - 1;
464 |
465 | if((s = a - first) > (t = b - a)) { s = t; }
466 | for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
467 | if((s = d - c) > (t = last - d - 1)) { s = t; }
468 | for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
469 |
470 | a = first + (b - a), c = last - (d - c);
471 | b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
472 |
473 | if((a - first) <= (last - c)) {
474 | if((last - c) <= (c - b)) {
475 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
476 | STACK_PUSH(c, last, depth, limit);
477 | last = a;
478 | } else if((a - first) <= (c - b)) {
479 | STACK_PUSH(c, last, depth, limit);
480 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
481 | last = a;
482 | } else {
483 | STACK_PUSH(c, last, depth, limit);
484 | STACK_PUSH(first, a, depth, limit);
485 | first = b, last = c, depth += 1, limit = ss_ilg(c - b);
486 | }
487 | } else {
488 | if((a - first) <= (c - b)) {
489 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
490 | STACK_PUSH(first, a, depth, limit);
491 | first = c;
492 | } else if((last - c) <= (c - b)) {
493 | STACK_PUSH(first, a, depth, limit);
494 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
495 | first = c;
496 | } else {
497 | STACK_PUSH(first, a, depth, limit);
498 | STACK_PUSH(c, last, depth, limit);
499 | first = b, last = c, depth += 1, limit = ss_ilg(c - b);
500 | }
501 | }
502 | } else {
503 | limit += 1;
504 | if(Td[PA[*first] - 1] < v) {
505 | first = ss_partition(PA, first, last, depth);
506 | limit = ss_ilg(last - first);
507 | }
508 | depth += 1;
509 | }
510 | }
511 | #undef STACK_SIZE
512 | }
513 |
514 | #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
515 |
516 |
517 | /*---------------------------------------------------------------------------*/
518 |
519 | #if SS_BLOCKSIZE != 0
520 |
521 | static INLINE
522 | void
523 | ss_blockswap(int *a, int *b, int n) {
524 | int t;
525 | for(; 0 < n; --n, ++a, ++b) {
526 | t = *a, *a = *b, *b = t;
527 | }
528 | }
529 |
530 | static INLINE
531 | void
532 | ss_rotate(int *first, int *middle, int *last) {
533 | int *a, *b, t;
534 | int l, r;
535 | l = middle - first, r = last - middle;
536 | for(; (0 < l) && (0 < r);) {
537 | if(l == r) { ss_blockswap(first, middle, l); break; }
538 | if(l < r) {
539 | a = last - 1, b = middle - 1;
540 | t = *a;
541 | do {
542 | *a-- = *b, *b-- = *a;
543 | if(b < first) {
544 | *a = t;
545 | last = a;
546 | if((r -= l + 1) <= l) { break; }
547 | a -= 1, b = middle - 1;
548 | t = *a;
549 | }
550 | } while(1);
551 | } else {
552 | a = first, b = middle;
553 | t = *a;
554 | do {
555 | *a++ = *b, *b++ = *a;
556 | if(last <= b) {
557 | *a = t;
558 | first = a + 1;
559 | if((l -= r + 1) <= r) { break; }
560 | a += 1, b = middle;
561 | t = *a;
562 | }
563 | } while(1);
564 | }
565 | }
566 | }
567 |
568 |
569 | /*---------------------------------------------------------------------------*/
570 |
571 | static
572 | void
573 | ss_inplacemerge(const unsigned char *T, const int *PA,
574 | int *first, int *middle, int *last,
575 | int depth) {
576 | const int *p;
577 | int *a, *b;
578 | int len, half;
579 | int q, r;
580 | int x;
581 |
582 | for(;;) {
583 | if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
584 | else { x = 0; p = PA + *(last - 1); }
585 | for(a = first, len = middle - first, half = len >> 1, r = -1;
586 | 0 < len;
587 | len = half, half >>= 1) {
588 | b = a + half;
589 | q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
590 | if(q < 0) {
591 | a = b + 1;
592 | half -= (len & 1) ^ 1;
593 | } else {
594 | r = q;
595 | }
596 | }
597 | if(a < middle) {
598 | if(r == 0) { *a = ~*a; }
599 | ss_rotate(a, middle, last);
600 | last -= middle - a;
601 | middle = a;
602 | if(first == middle) { break; }
603 | }
604 | --last;
605 | if(x != 0) { while(*--last < 0) { } }
606 | if(middle == last) { break; }
607 | }
608 | }
609 |
610 |
611 | /*---------------------------------------------------------------------------*/
612 |
613 | /* Merge-forward with internal buffer. */
614 | static
615 | void
616 | ss_mergeforward(const unsigned char *T, const int *PA,
617 | int *first, int *middle, int *last,
618 | int *buf, int depth) {
619 | int *a, *b, *c, *bufend;
620 | int t;
621 | int r;
622 |
623 | bufend = buf + (middle - first) - 1;
624 | ss_blockswap(buf, first, middle - first);
625 |
626 | for(t = *(a = first), b = buf, c = middle;;) {
627 | r = ss_compare(T, PA + *b, PA + *c, depth);
628 | if(r < 0) {
629 | do {
630 | *a++ = *b;
631 | if(bufend <= b) { *bufend = t; return; }
632 | *b++ = *a;
633 | } while(*b < 0);
634 | } else if(r > 0) {
635 | do {
636 | *a++ = *c, *c++ = *a;
637 | if(last <= c) {
638 | while(b < bufend) { *a++ = *b, *b++ = *a; }
639 | *a = *b, *b = t;
640 | return;
641 | }
642 | } while(*c < 0);
643 | } else {
644 | *c = ~*c;
645 | do {
646 | *a++ = *b;
647 | if(bufend <= b) { *bufend = t; return; }
648 | *b++ = *a;
649 | } while(*b < 0);
650 |
651 | do {
652 | *a++ = *c, *c++ = *a;
653 | if(last <= c) {
654 | while(b < bufend) { *a++ = *b, *b++ = *a; }
655 | *a = *b, *b = t;
656 | return;
657 | }
658 | } while(*c < 0);
659 | }
660 | }
661 | }
662 |
663 | /* Merge-backward with internal buffer. */
664 | static
665 | void
666 | ss_mergebackward(const unsigned char *T, const int *PA,
667 | int *first, int *middle, int *last,
668 | int *buf, int depth) {
669 | const int *p1, *p2;
670 | int *a, *b, *c, *bufend;
671 | int t;
672 | int r;
673 | int x;
674 |
675 | bufend = buf + (last - middle) - 1;
676 | ss_blockswap(buf, middle, last - middle);
677 |
678 | x = 0;
679 | if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
680 | else { p1 = PA + *bufend; }
681 | if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
682 | else { p2 = PA + *(middle - 1); }
683 | for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
684 | r = ss_compare(T, p1, p2, depth);
685 | if(0 < r) {
686 | if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
687 | *a-- = *b;
688 | if(b <= buf) { *buf = t; break; }
689 | *b-- = *a;
690 | if(*b < 0) { p1 = PA + ~*b; x |= 1; }
691 | else { p1 = PA + *b; }
692 | } else if(r < 0) {
693 | if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
694 | *a-- = *c, *c-- = *a;
695 | if(c < first) {
696 | while(buf < b) { *a-- = *b, *b-- = *a; }
697 | *a = *b, *b = t;
698 | break;
699 | }
700 | if(*c < 0) { p2 = PA + ~*c; x |= 2; }
701 | else { p2 = PA + *c; }
702 | } else {
703 | if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
704 | *a-- = ~*b;
705 | if(b <= buf) { *buf = t; break; }
706 | *b-- = *a;
707 | if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
708 | *a-- = *c, *c-- = *a;
709 | if(c < first) {
710 | while(buf < b) { *a-- = *b, *b-- = *a; }
711 | *a = *b, *b = t;
712 | break;
713 | }
714 | if(*b < 0) { p1 = PA + ~*b; x |= 1; }
715 | else { p1 = PA + *b; }
716 | if(*c < 0) { p2 = PA + ~*c; x |= 2; }
717 | else { p2 = PA + *c; }
718 | }
719 | }
720 | }
721 |
722 | /* D&C based merge. */
723 | static
724 | void
725 | ss_swapmerge(const unsigned char *T, const int *PA,
726 | int *first, int *middle, int *last,
727 | int *buf, int bufsize, int depth) {
728 | #define STACK_SIZE SS_SMERGE_STACKSIZE
729 | #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
730 | #define MERGE_CHECK(a, b, c)\
731 | do {\
732 | if(((c) & 1) ||\
733 | (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
734 | *(a) = ~*(a);\
735 | }\
736 | if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
737 | *(b) = ~*(b);\
738 | }\
739 | } while(0)
740 | struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
741 | int *l, *r, *lm, *rm;
742 | int m, len, half;
743 | int ssize;
744 | int check, next;
745 |
746 | for(check = 0, ssize = 0;;) {
747 | if((last - middle) <= bufsize) {
748 | if((first < middle) && (middle < last)) {
749 | ss_mergebackward(T, PA, first, middle, last, buf, depth);
750 | }
751 | MERGE_CHECK(first, last, check);
752 | STACK_POP(first, middle, last, check);
753 | continue;
754 | }
755 |
756 | if((middle - first) <= bufsize) {
757 | if(first < middle) {
758 | ss_mergeforward(T, PA, first, middle, last, buf, depth);
759 | }
760 | MERGE_CHECK(first, last, check);
761 | STACK_POP(first, middle, last, check);
762 | continue;
763 | }
764 |
765 | for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
766 | 0 < len;
767 | len = half, half >>= 1) {
768 | if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
769 | PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
770 | m += half + 1;
771 | half -= (len & 1) ^ 1;
772 | }
773 | }
774 |
775 | if(0 < m) {
776 | lm = middle - m, rm = middle + m;
777 | ss_blockswap(lm, middle, m);
778 | l = r = middle, next = 0;
779 | if(rm < last) {
780 | if(*rm < 0) {
781 | *rm = ~*rm;
782 | if(first < lm) { for(; *--l < 0;) { } next |= 4; }
783 | next |= 1;
784 | } else if(first < lm) {
785 | for(; *r < 0; ++r) { }
786 | next |= 2;
787 | }
788 | }
789 |
790 | if((l - first) <= (last - r)) {
791 | STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
792 | middle = lm, last = l, check = (check & 3) | (next & 4);
793 | } else {
794 | if((next & 2) && (r == middle)) { next ^= 6; }
795 | STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
796 | first = r, middle = rm, check = (next & 3) | (check & 4);
797 | }
798 | } else {
799 | if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
800 | *middle = ~*middle;
801 | }
802 | MERGE_CHECK(first, last, check);
803 | STACK_POP(first, middle, last, check);
804 | }
805 | }
806 | #undef STACK_SIZE
807 | }
808 |
809 | #endif /* SS_BLOCKSIZE != 0 */
810 |
811 |
812 | /*---------------------------------------------------------------------------*/
813 |
814 | /* Substring sort */
815 | static
816 | void
817 | sssort(const unsigned char *T, const int *PA,
818 | int *first, int *last,
819 | int *buf, int bufsize,
820 | int depth, int n, int lastsuffix) {
821 | int *a;
822 | #if SS_BLOCKSIZE != 0
823 | int *b, *middle, *curbuf;
824 | int j, k, curbufsize, limit;
825 | #endif
826 | int i;
827 |
828 | if(lastsuffix != 0) { ++first; }
829 |
830 | #if SS_BLOCKSIZE == 0
831 | ss_mintrosort(T, PA, first, last, depth);
832 | #else
833 | if((bufsize < SS_BLOCKSIZE) &&
834 | (bufsize < (last - first)) &&
835 | (bufsize < (limit = ss_isqrt(last - first)))) {
836 | if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
837 | buf = middle = last - limit, bufsize = limit;
838 | } else {
839 | middle = last, limit = 0;
840 | }
841 | for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
842 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
843 | ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
844 | #elif 1 < SS_BLOCKSIZE
845 | ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
846 | #endif
847 | curbufsize = last - (a + SS_BLOCKSIZE);
848 | curbuf = a + SS_BLOCKSIZE;
849 | if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
850 | for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
851 | ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
852 | }
853 | }
854 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
855 | ss_mintrosort(T, PA, a, middle, depth);
856 | #elif 1 < SS_BLOCKSIZE
857 | ss_insertionsort(T, PA, a, middle, depth);
858 | #endif
859 | for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
860 | if(i & 1) {
861 | ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
862 | a -= k;
863 | }
864 | }
865 | if(limit != 0) {
866 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
867 | ss_mintrosort(T, PA, middle, last, depth);
868 | #elif 1 < SS_BLOCKSIZE
869 | ss_insertionsort(T, PA, middle, last, depth);
870 | #endif
871 | ss_inplacemerge(T, PA, first, middle, last, depth);
872 | }
873 | #endif
874 |
875 | if(lastsuffix != 0) {
876 | /* Insert last type B* suffix. */
877 | int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
878 | for(a = first, i = *(first - 1);
879 | (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
880 | ++a) {
881 | *(a - 1) = *a;
882 | }
883 | *(a - 1) = i;
884 | }
885 | }
886 |
887 |
888 | /*---------------------------------------------------------------------------*/
889 |
890 | static INLINE
891 | int
892 | tr_ilg(int n) {
893 | return (n & 0xffff0000) ?
894 | ((n & 0xff000000) ?
895 | 24 + lg_table[(n >> 24) & 0xff] :
896 | 16 + lg_table[(n >> 16) & 0xff]) :
897 | ((n & 0x0000ff00) ?
898 | 8 + lg_table[(n >> 8) & 0xff] :
899 | 0 + lg_table[(n >> 0) & 0xff]);
900 | }
901 |
902 |
903 | /*---------------------------------------------------------------------------*/
904 |
905 | /* Simple insertionsort for small size groups. */
906 | static
907 | void
908 | tr_insertionsort(const int *ISAd, int *first, int *last) {
909 | int *a, *b;
910 | int t, r;
911 |
912 | for(a = first + 1; a < last; ++a) {
913 | for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
914 | do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
915 | if(b < first) { break; }
916 | }
917 | if(r == 0) { *b = ~*b; }
918 | *(b + 1) = t;
919 | }
920 | }
921 |
922 |
923 | /*---------------------------------------------------------------------------*/
924 |
925 | static INLINE
926 | void
927 | tr_fixdown(const int *ISAd, int *SA, int i, int size) {
928 | int j, k;
929 | int v;
930 | int c, d, e;
931 |
932 | for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
933 | d = ISAd[SA[k = j++]];
934 | if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
935 | if(d <= c) { break; }
936 | }
937 | SA[i] = v;
938 | }
939 |
940 | /* Simple top-down heapsort. */
941 | static
942 | void
943 | tr_heapsort(const int *ISAd, int *SA, int size) {
944 | int i, m;
945 | int t;
946 |
947 | m = size;
948 | if((size % 2) == 0) {
949 | m--;
950 | if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
951 | }
952 |
953 | for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
954 | if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
955 | for(i = m - 1; 0 < i; --i) {
956 | t = SA[0], SA[0] = SA[i];
957 | tr_fixdown(ISAd, SA, 0, i);
958 | SA[i] = t;
959 | }
960 | }
961 |
962 |
963 | /*---------------------------------------------------------------------------*/
964 |
965 | /* Returns the median of three elements. */
966 | static INLINE
967 | int *
968 | tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
969 | int *t;
970 | if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
971 | if(ISAd[*v2] > ISAd[*v3]) {
972 | if(ISAd[*v1] > ISAd[*v3]) { return v1; }
973 | else { return v3; }
974 | }
975 | return v2;
976 | }
977 |
978 | /* Returns the median of five elements. */
979 | static INLINE
980 | int *
981 | tr_median5(const int *ISAd,
982 | int *v1, int *v2, int *v3, int *v4, int *v5) {
983 | int *t;
984 | if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
985 | if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
986 | if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
987 | if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
988 | if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
989 | if(ISAd[*v3] > ISAd[*v4]) { return v4; }
990 | return v3;
991 | }
992 |
993 | /* Returns the pivot element. */
994 | static INLINE
995 | int *
996 | tr_pivot(const int *ISAd, int *first, int *last) {
997 | int *middle;
998 | int t;
999 |
1000 | t = last - first;
1001 | middle = first + t / 2;
1002 |
1003 | if(t <= 512) {
1004 | if(t <= 32) {
1005 | return tr_median3(ISAd, first, middle, last - 1);
1006 | } else {
1007 | t >>= 2;
1008 | return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1009 | }
1010 | }
1011 | t >>= 3;
1012 | first = tr_median3(ISAd, first, first + t, first + (t << 1));
1013 | middle = tr_median3(ISAd, middle - t, middle, middle + t);
1014 | last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1015 | return tr_median3(ISAd, first, middle, last);
1016 | }
1017 |
1018 |
1019 | /*---------------------------------------------------------------------------*/
1020 |
1021 | typedef struct _trbudget_t trbudget_t;
1022 | struct _trbudget_t {
1023 | int chance;
1024 | int remain;
1025 | int incval;
1026 | int count;
1027 | };
1028 |
1029 | static INLINE
1030 | void
1031 | trbudget_init(trbudget_t *budget, int chance, int incval) {
1032 | budget->chance = chance;
1033 | budget->remain = budget->incval = incval;
1034 | }
1035 |
1036 | static INLINE
1037 | int
1038 | trbudget_check(trbudget_t *budget, int size) {
1039 | if(size <= budget->remain) { budget->remain -= size; return 1; }
1040 | if(budget->chance == 0) { budget->count += size; return 0; }
1041 | budget->remain += budget->incval - size;
1042 | budget->chance -= 1;
1043 | return 1;
1044 | }
1045 |
1046 |
1047 | /*---------------------------------------------------------------------------*/
1048 |
1049 | static INLINE
1050 | void
1051 | tr_partition(const int *ISAd,
1052 | int *first, int *middle, int *last,
1053 | int **pa, int **pb, int v) {
1054 | int *a, *b, *c, *d, *e, *f;
1055 | int t, s;
1056 | int x = 0;
1057 |
1058 | for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
1059 | if(((a = b) < last) && (x < v)) {
1060 | for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
1061 | if(x == v) { SWAP(*b, *a); ++a; }
1062 | }
1063 | }
1064 | for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
1065 | if((b < (d = c)) && (x > v)) {
1066 | for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1067 | if(x == v) { SWAP(*c, *d); --d; }
1068 | }
1069 | }
1070 | for(; b < c;) {
1071 | SWAP(*b, *c);
1072 | for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
1073 | if(x == v) { SWAP(*b, *a); ++a; }
1074 | }
1075 | for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1076 | if(x == v) { SWAP(*c, *d); --d; }
1077 | }
1078 | }
1079 |
1080 | if(a <= d) {
1081 | c = b - 1;
1082 | if((s = a - first) > (t = b - a)) { s = t; }
1083 | for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1084 | if((s = d - c) > (t = last - d - 1)) { s = t; }
1085 | for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1086 | first += (b - a), last -= (d - c);
1087 | }
1088 | *pa = first, *pb = last;
1089 | }
1090 |
1091 | static
1092 | void
1093 | tr_copy(int *ISA, const int *SA,
1094 | int *first, int *a, int *b, int *last,
1095 | int depth) {
1096 | /* sort suffixes of middle partition
1097 | by using sorted order of suffixes of left and right partition. */
1098 | int *c, *d, *e;
1099 | int s, v;
1100 |
1101 | v = b - SA - 1;
1102 | for(c = first, d = a - 1; c <= d; ++c) {
1103 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1104 | *++d = s;
1105 | ISA[s] = d - SA;
1106 | }
1107 | }
1108 | for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1109 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1110 | *--d = s;
1111 | ISA[s] = d - SA;
1112 | }
1113 | }
1114 | }
1115 |
1116 | static
1117 | void
1118 | tr_partialcopy(int *ISA, const int *SA,
1119 | int *first, int *a, int *b, int *last,
1120 | int depth) {
1121 | int *c, *d, *e;
1122 | int s, v;
1123 | int rank, lastrank, newrank = -1;
1124 |
1125 | v = b - SA - 1;
1126 | lastrank = -1;
1127 | for(c = first, d = a - 1; c <= d; ++c) {
1128 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1129 | *++d = s;
1130 | rank = ISA[s + depth];
1131 | if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1132 | ISA[s] = newrank;
1133 | }
1134 | }
1135 |
1136 | lastrank = -1;
1137 | for(e = d; first <= e; --e) {
1138 | rank = ISA[*e];
1139 | if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
1140 | if(newrank != rank) { ISA[*e] = newrank; }
1141 | }
1142 |
1143 | lastrank = -1;
1144 | for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1145 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1146 | *--d = s;
1147 | rank = ISA[s + depth];
1148 | if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1149 | ISA[s] = newrank;
1150 | }
1151 | }
1152 | }
1153 |
1154 | static
1155 | void
1156 | tr_introsort(int *ISA, const int *ISAd,
1157 | int *SA, int *first, int *last,
1158 | trbudget_t *budget) {
1159 | #define STACK_SIZE TR_STACKSIZE
1160 | struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
1161 | int *a, *b, *c;
1162 | int t;
1163 | int v, x = 0;
1164 | int incr = ISAd - ISA;
1165 | int limit, next;
1166 | int ssize, trlink = -1;
1167 |
1168 | for(ssize = 0, limit = tr_ilg(last - first);;) {
1169 |
1170 | if(limit < 0) {
1171 | if(limit == -1) {
1172 | /* tandem repeat partition */
1173 | tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
1174 |
1175 | /* update ranks */
1176 | if(a < last) {
1177 | for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1178 | }
1179 | if(b < last) {
1180 | for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1181 | }
1182 |
1183 | /* push */
1184 | if(1 < (b - a)) {
1185 | STACK_PUSH5(NULL, a, b, 0, 0);
1186 | STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1187 | trlink = ssize - 2;
1188 | }
1189 | if((a - first) <= (last - b)) {
1190 | if(1 < (a - first)) {
1191 | STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
1192 | last = a, limit = tr_ilg(a - first);
1193 | } else if(1 < (last - b)) {
1194 | first = b, limit = tr_ilg(last - b);
1195 | } else {
1196 | STACK_POP5(ISAd, first, last, limit, trlink);
1197 | }
1198 | } else {
1199 | if(1 < (last - b)) {
1200 | STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
1201 | first = b, limit = tr_ilg(last - b);
1202 | } else if(1 < (a - first)) {
1203 | last = a, limit = tr_ilg(a - first);
1204 | } else {
1205 | STACK_POP5(ISAd, first, last, limit, trlink);
1206 | }
1207 | }
1208 | } else if(limit == -2) {
1209 | /* tandem repeat copy */
1210 | a = stack[--ssize].b, b = stack[ssize].c;
1211 | if(stack[ssize].d == 0) {
1212 | tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
1213 | } else {
1214 | if(0 <= trlink) { stack[trlink].d = -1; }
1215 | tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
1216 | }
1217 | STACK_POP5(ISAd, first, last, limit, trlink);
1218 | } else {
1219 | /* sorted partition */
1220 | if(0 <= *first) {
1221 | a = first;
1222 | do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
1223 | first = a;
1224 | }
1225 | if(first < last) {
1226 | a = first; do { *a = ~*a; } while(*++a < 0);
1227 | next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
1228 | if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
1229 |
1230 | /* push */
1231 | if(trbudget_check(budget, a - first)) {
1232 | if((a - first) <= (last - a)) {
1233 | STACK_PUSH5(ISAd, a, last, -3, trlink);
1234 | ISAd += incr, last = a, limit = next;
1235 | } else {
1236 | if(1 < (last - a)) {
1237 | STACK_PUSH5(ISAd + incr, first, a, next, trlink);
1238 | first = a, limit = -3;
1239 | } else {
1240 | ISAd += incr, last = a, limit = next;
1241 | }
1242 | }
1243 | } else {
1244 | if(0 <= trlink) { stack[trlink].d = -1; }
1245 | if(1 < (last - a)) {
1246 | first = a, limit = -3;
1247 | } else {
1248 | STACK_POP5(ISAd, first, last, limit, trlink);
1249 | }
1250 | }
1251 | } else {
1252 | STACK_POP5(ISAd, first, last, limit, trlink);
1253 | }
1254 | }
1255 | continue;
1256 | }
1257 |
1258 | if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
1259 | tr_insertionsort(ISAd, first, last);
1260 | limit = -3;
1261 | continue;
1262 | }
1263 |
1264 | if(limit-- == 0) {
1265 | tr_heapsort(ISAd, first, last - first);
1266 | for(a = last - 1; first < a; a = b) {
1267 | for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1268 | }
1269 | limit = -3;
1270 | continue;
1271 | }
1272 |
1273 | /* choose pivot */
1274 | a = tr_pivot(ISAd, first, last);
1275 | SWAP(*first, *a);
1276 | v = ISAd[*first];
1277 |
1278 | /* partition */
1279 | tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1280 | if((last - first) != (b - a)) {
1281 | next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
1282 |
1283 | /* update ranks */
1284 | for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1285 | if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
1286 |
1287 | /* push */
1288 | if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
1289 | if((a - first) <= (last - b)) {
1290 | if((last - b) <= (b - a)) {
1291 | if(1 < (a - first)) {
1292 | STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1293 | STACK_PUSH5(ISAd, b, last, limit, trlink);
1294 | last = a;
1295 | } else if(1 < (last - b)) {
1296 | STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1297 | first = b;
1298 | } else {
1299 | ISAd += incr, first = a, last = b, limit = next;
1300 | }
1301 | } else if((a - first) <= (b - a)) {
1302 | if(1 < (a - first)) {
1303 | STACK_PUSH5(ISAd, b, last, limit, trlink);
1304 | STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1305 | last = a;
1306 | } else {
1307 | STACK_PUSH5(ISAd, b, last, limit, trlink);
1308 | ISAd += incr, first = a, last = b, limit = next;
1309 | }
1310 | } else {
1311 | STACK_PUSH5(ISAd, b, last, limit, trlink);
1312 | STACK_PUSH5(ISAd, first, a, limit, trlink);
1313 | ISAd += incr, first = a, last = b, limit = next;
1314 | }
1315 | } else {
1316 | if((a - first) <= (b - a)) {
1317 | if(1 < (last - b)) {
1318 | STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1319 | STACK_PUSH5(ISAd, first, a, limit, trlink);
1320 | first = b;
1321 | } else if(1 < (a - first)) {
1322 | STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1323 | last = a;
1324 | } else {
1325 | ISAd += incr, first = a, last = b, limit = next;
1326 | }
1327 | } else if((last - b) <= (b - a)) {
1328 | if(1 < (last - b)) {
1329 | STACK_PUSH5(ISAd, first, a, limit, trlink);
1330 | STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1331 | first = b;
1332 | } else {
1333 | STACK_PUSH5(ISAd, first, a, limit, trlink);
1334 | ISAd += incr, first = a, last = b, limit = next;
1335 | }
1336 | } else {
1337 | STACK_PUSH5(ISAd, first, a, limit, trlink);
1338 | STACK_PUSH5(ISAd, b, last, limit, trlink);
1339 | ISAd += incr, first = a, last = b, limit = next;
1340 | }
1341 | }
1342 | } else {
1343 | if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1344 | if((a - first) <= (last - b)) {
1345 | if(1 < (a - first)) {
1346 | STACK_PUSH5(ISAd, b, last, limit, trlink);
1347 | last = a;
1348 | } else if(1 < (last - b)) {
1349 | first = b;
1350 | } else {
1351 | STACK_POP5(ISAd, first, last, limit, trlink);
1352 | }
1353 | } else {
1354 | if(1 < (last - b)) {
1355 | STACK_PUSH5(ISAd, first, a, limit, trlink);
1356 | first = b;
1357 | } else if(1 < (a - first)) {
1358 | last = a;
1359 | } else {
1360 | STACK_POP5(ISAd, first, last, limit, trlink);
1361 | }
1362 | }
1363 | }
1364 | } else {
1365 | if(trbudget_check(budget, last - first)) {
1366 | limit = tr_ilg(last - first), ISAd += incr;
1367 | } else {
1368 | if(0 <= trlink) { stack[trlink].d = -1; }
1369 | STACK_POP5(ISAd, first, last, limit, trlink);
1370 | }
1371 | }
1372 | }
1373 | #undef STACK_SIZE
1374 | }
1375 |
1376 |
1377 |
1378 | /*---------------------------------------------------------------------------*/
1379 |
1380 | /* Tandem repeat sort */
1381 | static
1382 | void
1383 | trsort(int *ISA, int *SA, int n, int depth) {
1384 | int *ISAd;
1385 | int *first, *last;
1386 | trbudget_t budget;
1387 | int t, skip, unsorted;
1388 |
1389 | trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
1390 | /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
1391 | for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
1392 | first = SA;
1393 | skip = 0;
1394 | unsorted = 0;
1395 | do {
1396 | if((t = *first) < 0) { first -= t; skip += t; }
1397 | else {
1398 | if(skip != 0) { *(first + skip) = skip; skip = 0; }
1399 | last = SA + ISA[t] + 1;
1400 | if(1 < (last - first)) {
1401 | budget.count = 0;
1402 | tr_introsort(ISA, ISAd, SA, first, last, &budget);
1403 | if(budget.count != 0) { unsorted += budget.count; }
1404 | else { skip = first - last; }
1405 | } else if((last - first) == 1) {
1406 | skip = -1;
1407 | }
1408 | first = last;
1409 | }
1410 | } while(first < (SA + n));
1411 | if(skip != 0) { *(first + skip) = skip; }
1412 | if(unsorted == 0) { break; }
1413 | }
1414 | }
1415 |
1416 |
1417 | /*---------------------------------------------------------------------------*/
1418 |
1419 | /* Sorts suffixes of type B*. */
1420 | static
1421 | int
1422 | sort_typeBstar(const unsigned char *T, int *SA,
1423 | int *bucket_A, int *bucket_B,
1424 | int n) {
1425 | int *PAb, *ISAb, *buf;
1426 | #ifdef _OPENMP
1427 | int *curbuf;
1428 | int l;
1429 | #endif
1430 | int i, j, k, t, m, bufsize;
1431 | int c0, c1;
1432 | #ifdef _OPENMP
1433 | int d0, d1;
1434 | int tmp;
1435 | #endif
1436 |
1437 | /* Count the number of occurrences of the first one or two characters of each
1438 | type A, B and B* suffix. Moreover, store the beginning position of all
1439 | type B* suffixes into the array SA. */
1440 | for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
1441 | /* type A suffix. */
1442 | do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
1443 | if(0 <= i) {
1444 | /* type B* suffix. */
1445 | ++BUCKET_BSTAR(c0, c1);
1446 | SA[--m] = i;
1447 | /* type B suffix. */
1448 | for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
1449 | ++BUCKET_B(c0, c1);
1450 | }
1451 | }
1452 | }
1453 | m = n - m;
1454 | /*
1455 | note:
1456 | A type B* suffix is lexicographically smaller than a type B suffix that
1457 | begins with the same first two characters.
1458 | */
1459 |
1460 | /* Calculate the index of start/end point of each bucket. */
1461 | for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
1462 | t = i + BUCKET_A(c0);
1463 | BUCKET_A(c0) = i + j; /* start point */
1464 | i = t + BUCKET_B(c0, c0);
1465 | for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
1466 | j += BUCKET_BSTAR(c0, c1);
1467 | BUCKET_BSTAR(c0, c1) = j; /* end point */
1468 | i += BUCKET_B(c0, c1);
1469 | }
1470 | }
1471 |
1472 | if(0 < m) {
1473 | /* Sort the type B* suffixes by their first two characters. */
1474 | PAb = SA + n - m; ISAb = SA + m;
1475 | for(i = m - 2; 0 <= i; --i) {
1476 | t = PAb[i], c0 = T[t], c1 = T[t + 1];
1477 | SA[--BUCKET_BSTAR(c0, c1)] = i;
1478 | }
1479 | t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1480 | SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
1481 |
1482 | /* Sort the type B* substrings using sssort. */
1483 | #ifdef _OPENMP
1484 | tmp = omp_get_max_threads();
1485 | buf = SA + m, bufsize = (n - (2 * m)) / tmp;
1486 | c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
1487 | #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
1488 | {
1489 | tmp = omp_get_thread_num();
1490 | curbuf = buf + tmp * bufsize;
1491 | k = 0;
1492 | for(;;) {
1493 | #pragma omp critical(sssort_lock)
1494 | {
1495 | if(0 < (l = j)) {
1496 | d0 = c0, d1 = c1;
1497 | do {
1498 | k = BUCKET_BSTAR(d0, d1);
1499 | if(--d1 <= d0) {
1500 | d1 = ALPHABET_SIZE - 1;
1501 | if(--d0 < 0) { break; }
1502 | }
1503 | } while(((l - k) <= 1) && (0 < (l = k)));
1504 | c0 = d0, c1 = d1, j = k;
1505 | }
1506 | }
1507 | if(l == 0) { break; }
1508 | sssort(T, PAb, SA + k, SA + l,
1509 | curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
1510 | }
1511 | }
1512 | #else
1513 | buf = SA + m, bufsize = n - (2 * m);
1514 | for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1515 | for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1516 | i = BUCKET_BSTAR(c0, c1);
1517 | if(1 < (j - i)) {
1518 | sssort(T, PAb, SA + i, SA + j,
1519 | buf, bufsize, 2, n, *(SA + i) == (m - 1));
1520 | }
1521 | }
1522 | }
1523 | #endif
1524 |
1525 | /* Compute ranks of type B* substrings. */
1526 | for(i = m - 1; 0 <= i; --i) {
1527 | if(0 <= SA[i]) {
1528 | j = i;
1529 | do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
1530 | SA[i + 1] = i - j;
1531 | if(i <= 0) { break; }
1532 | }
1533 | j = i;
1534 | do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
1535 | ISAb[SA[i]] = j;
1536 | }
1537 |
1538 | /* Construct the inverse suffix array of type B* suffixes using trsort. */
1539 | trsort(ISAb, SA, m, 1);
1540 |
1541 | /* Set the sorted order of tyoe B* suffixes. */
1542 | for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1543 | for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1544 | if(0 <= i) {
1545 | t = i;
1546 | for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
1547 | SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1548 | }
1549 | }
1550 |
1551 | /* Calculate the index of start/end point of each bucket. */
1552 | BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
1553 | for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
1554 | i = BUCKET_A(c0 + 1) - 1;
1555 | for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
1556 | t = i - BUCKET_B(c0, c1);
1557 | BUCKET_B(c0, c1) = i; /* end point */
1558 |
1559 | /* Move all type B* suffixes to the correct position. */
1560 | for(i = t, j = BUCKET_BSTAR(c0, c1);
1561 | j <= k;
1562 | --i, --k) { SA[i] = SA[k]; }
1563 | }
1564 | BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
1565 | BUCKET_B(c0, c0) = i; /* end point */
1566 | }
1567 | }
1568 |
1569 | return m;
1570 | }
1571 |
1572 | /* Constructs the suffix array by using the sorted order of type B* suffixes. */
1573 | static
1574 | void
1575 | construct_SA(const unsigned char *T, int *SA,
1576 | int *bucket_A, int *bucket_B,
1577 | int n, int m) {
1578 | int *i, *j, *k;
1579 | int s;
1580 | int c0, c1, c2;
1581 |
1582 | if(0 < m) {
1583 | /* Construct the sorted order of type B suffixes by using
1584 | the sorted order of type B* suffixes. */
1585 | for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1586 | /* Scan the suffix array from right to left. */
1587 | for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1588 | j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1589 | i <= j;
1590 | --j) {
1591 | if(0 < (s = *j)) {
1592 | *j = ~s;
1593 | c0 = T[--s];
1594 | if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1595 | if(c0 != c2) {
1596 | if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1597 | k = SA + BUCKET_B(c2 = c0, c1);
1598 | }
1599 | *k-- = s;
1600 | } else*j = ~s;
1601 | }
1602 | }
1603 | }
1604 |
1605 | /* Construct the suffix array by using
1606 | the sorted order of type B suffixes. */
1607 | k = SA + BUCKET_A(c2 = T[n - 1]);
1608 | *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
1609 | /* Scan the suffix array from left to right. */
1610 | for(i = SA, j = SA + n; i < j; ++i) {
1611 | if(0 < (s = *i)) {
1612 | c0 = T[--s];
1613 | if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
1614 | if(c0 != c2) {
1615 | BUCKET_A(c2) = k - SA;
1616 | k = SA + BUCKET_A(c2 = c0);
1617 | }
1618 | *k++ = s;
1619 | } else*i = ~s;
1620 | }
1621 | }
1622 | /*---------------------------------------------------------------------------*/
1623 |
1624 | /*- Function -*/
1625 |
1626 | int
1627 | divsufsort(const unsigned char *T, int *SA, int *bucket, int n) {
1628 | int *bucket_A=bucket, *bucket_B=bucket+BUCKET_A_SIZE;
1629 | int m;
1630 | int err = 0;
1631 |
1632 | /* Check arguments. */
1633 | if((T == NULL) || (SA == NULL) || (n < 0))return-1;
1634 | if(n == 0)return 0;
1635 | if(n == 1)return SA[0]=0;
1636 | if(n == 2){SA[m=T[0]
2 |
3 | void e8(uint8_t *in_buf, int32_t n) {
4 | int32_t i;
5 | int32_t *op;
6 | for(i=0; i= -i && *op < n-i) {
17 | *op += i;
18 | } else if ( *op >= n-i && *op < n ) {
19 | *op -= n; // to [-i,1]
20 | }
21 | i+=4;
22 | }
23 | }
24 | }
25 |
26 | void e8back(uint8_t *buf,int32_t n) {
27 | int32_t i;
28 | int32_t *op;
29 | for(i=0; i= -i && *op < 0) {
41 | *op += n;
42 | } else if ( *op >= 0 && *op < n ) {
43 | *op -= i;
44 | }
45 | i+=4;
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/lzoma.h:
--------------------------------------------------------------------------------
1 | #define AuthorID 0xA1Ef
2 | #define AlgoID "LZOM"
3 | #define Version 0x00
4 |
5 | #define BLOCK_STORED 0x80000000
6 | #define BLOCK_LAST 0x40000000
7 |
8 | #define HISTORY_SIZE(dict_size) (32*1024<> 4)
10 |
11 | #define longlen 5400
12 | #define hugelen 0x060000
13 | #define breaklz 512
14 | #define lzmagic 0x002FFe00*2
15 | #define lzshift(top) ((9*top)>>3)
16 |
17 | #define lzlow(total) ((total <= 400000) ? 60 :50)
18 |
19 |
--------------------------------------------------------------------------------
/pack.c:
--------------------------------------------------------------------------------
1 | // test file compression based on lzoma algorith
2 | // (c) 2015,2016 Alexandr Efimov
3 | // License: GPL 2.0 or later
4 | // Uses divsufsort library for faster initialization (thanks to xezz for suggestion), see divsufsort.h for its license details.
5 | //
6 | // Discussion thread: http://encode.ru/threads/2280-LZOMA
7 | //
8 | // Notes:
9 | //
10 | // Pros:
11 | // Compression ratio is very good (much higher than lzo, ucl, gzip).
12 | // Decompression speed is very high (faster than gzip, much faster than bzip2,lzham, lzma,xz)
13 | // tiny decompressor code (asm version of decompress function less than 400 bytes)
14 | //
15 | // compressed data format is somewhere between lzo and lzma
16 | // uses static encoding and byte-aligned literals and byte-aligned parts of match offset for decompression speed
17 | //
18 | // Cons:
19 | // compressor is VERY slow. It is possible to implement faster compressor at the cost of some compression ratio.
20 | // may be it is possible to adapt lzma compressor code.
21 | //
22 | // Other:
23 | // Code of both compression/decompression utils is experimental.
24 | // compressed data format is not stable yet.
25 | // compressor source code is more like a ground for experiments, not a finished product yet.
26 | // some commented out code was intended for experiments with Reduced-offset LZ, RLE step before LZ, LZX-style encoding of matches, various heuristics, etc.
27 | //
28 | #include
29 | #include
30 | #include"divsufsort.h"
31 |
32 | #include "lzoma.h"
33 | #include "bpe.h"
34 | #include "e8.h"
35 |
36 | #define MINOLEN 1
37 | #define MINLZ 2
38 |
39 | int level, short_match_level, match_level;
40 | int levels[9][3]= {
41 | {1,1,2},
42 | {1,2,3},
43 | {2,3,5},
44 | {3,5,15},
45 | {3,7,30},
46 | {3,10,100},
47 | {3,20,200}, // default level -7
48 | {3,40,500},
49 | {3,100,1000}
50 | };
51 | int verbose = 0;
52 |
53 | int dict_size, history_size, block_size;
54 |
55 | FILE *flzlit=NULL;
56 | FILE *flit=NULL;
57 | FILE *folz=NULL;
58 | FILE *flen=NULL;
59 | FILE *fdist=NULL;
60 | #ifdef EXPERIMENTS
61 | // this is for some experimention only.
62 | FILE *test=NULL;
63 | FILE *test2=NULL;
64 | FILE *test3=NULL;
65 | #endif
66 |
67 | uint32_t *rle;
68 | uint8_t *in_buf; /* text to be encoded */
69 | //uint8_t *out_buf; - shared with rle
70 | #define out_buf ((uint8_t *)rle)
71 |
72 | typedef struct {
73 | int32_t cache; /* best possible result in bits if we start with lz or letter code */
74 | int32_t best_ofs; /* best way to start, assuming we do not start with OLD OFFSET code */
75 | int32_t best_len; /* best way to start - match len, assuming we do not start with OLD OFFSET code */
76 | int32_t use_olz; /* if not zero, repeat same offset after this number of literals */
77 | int32_t olz_len; /* length of repeated lz */
78 | int32_t use_olz2; /* if not zero, repeat same offset after this number of literals after first repeat */
79 | int32_t olz_len2; /* length of repeated lz */
80 | } FutureState;
81 |
82 | typedef struct {
83 | int32_t same; /* pointer to previous match of at least 2 bytes. for checking nearby short matches */
84 | int32_t samelen; /* length of match between this and previous string pointed by "same" */
85 |
86 | // sorted tree in order to quickly check long matches starting from the longest match
87 | int32_t sorted_len;
88 | int32_t sorted_prev;
89 | int32_t sorted_next;
90 | } PastState;
91 |
92 | // PastState and FutureState share the same memory buffer
93 | // sizeof(PastState) should be < sizeof(FutureState)
94 | void *state;
95 | void *past_state;
96 |
97 | #define sorted ((int32_t *)((uint8_t *)state)) // used very early in initialization
98 |
99 | #define cache(i) ((FutureState *)state)[i-in_offset].cache
100 | #define best_ofs(i) ((FutureState *)state)[i-in_offset].best_ofs
101 | #define best_len(i) ((FutureState *)state)[i-in_offset].best_len
102 | #define use_olz(i) ((FutureState *)state)[i-in_offset].use_olz
103 | #define olz_len(i) ((FutureState *)state)[i-in_offset].olz_len
104 | #define use_olz2(i) ((FutureState *)state)[i-in_offset].use_olz2
105 | #define olz_len2(i) ((FutureState *)state)[i-in_offset].olz_len2
106 |
107 | #define same(i) (((PastState *)past_state)[i].same)
108 | #define samelen(i) (((PastState *)past_state)[i].samelen)
109 | #define sorted_len(i) (((PastState *)past_state)[i].sorted_len)
110 | #define sorted_prev(i) (((PastState *)past_state)[i].sorted_prev)
111 | #define sorted_next(i) (((PastState *)past_state)[i].sorted_next)
112 |
113 | #ifdef _MSC_VER
114 | #include
115 | uint32_t __inline __builtin_clz( uint32_t value )
116 | {
117 | uint32_t leading_zero = 0;
118 |
119 | if ( _BitScanReverse( &leading_zero, value ) )
120 | {
121 | return 31 - leading_zero;
122 | }
123 | else
124 | {
125 | return 32;
126 | }
127 | }
128 | #endif
129 |
130 | int in_offset = 0;
131 |
132 | static inline int price_offset(int num,int total) {
133 | if (total<=256) return 8;//top=0;
134 | register int res=8;
135 | register int x=256;
136 |
137 | int top = lzlow(total);
138 | while (1) {
139 | x+=x;
140 | if (x>=total+top) break; /* only 1 bit to be outputted left */
141 | if (x & lzmagic)
142 | top=lzshift(top);
143 | //if (x>=breaklz) {
144 | if (num=x-total) { res++;}
152 | return res;
153 | }
154 |
155 | static inline int price_replen(int num) {//num>=2
156 | if (num<4) return 2;// 00 01
157 | num-=2;
158 | #define REPLEN_SKEW 1
159 | return REPLEN_SKEW+((31-__builtin_clz(num))<<1);
160 | }
161 |
162 | static inline int price_len(int num) {//num>=2
163 | if (num<4) return 2;// 00 01 10
164 | num-=2;
165 | #define LEN_SKEW 1
166 | return LEN_SKEW+((31-__builtin_clz(num))<<1);
167 | }
168 |
169 | int lastpos;
170 | unsigned int bit_cnt;
171 | int outpos;
172 |
173 | static inline void putbit(int bit) {
174 | bit_cnt>>=1;
175 | if (bit_cnt==0) {
176 | lastpos=outpos;
177 | *(unsigned long*)(out_buf+lastpos)=0;
178 | outpos+=4;
179 | bit_cnt=0x80000000;
180 | }
181 | if (bit) *(unsigned long *)(out_buf+lastpos)|=bit_cnt;
182 | }
183 |
184 | int stlet=0;
185 | int stlz=0;
186 | int stolz=0;
187 | int bitslzlen=0;
188 | int bitsolzlen=0;
189 | int bitslen=0;
190 | int bitsdist=0;
191 | int bitslit=0;
192 |
193 | static inline void putenc(int num,int total, int break_at, int debug) {
194 | char bits[100];
195 | int res=0;
196 | int x=1;
197 | int obyte=0;
198 | if (fdist) fwrite(&num,1,4,fdist);
199 | obyte=1;
200 | bits[0]=0;
201 | bits[1]=0;
202 | bits[2]=0;
203 | bits[3]=0;
204 | bits[4]=0;
205 | bits[5]=0;
206 | bits[6]=0;
207 | bits[7]=0;
208 | //if (debug) fprintf(stderr,"ofs=%d total=%d\n",num,total);
209 |
210 | int top=lzlow(total);
211 | //if (total<=256) top=0;
212 | while (1) {
213 | x+=x;
214 | if (x>=512&& x>=total+top) break; /* only 1 bit to be outputted left */
215 | if (x & lzmagic)
216 | top=lzshift(top);
217 | if (x>=break_at) {
218 | if (num=x) {
227 | num+=x;
228 | bits[res++]=2;
229 | }
230 |
231 | doneit:
232 | for(;res<8;res++) {
233 | bits[res++]=2;
234 | }
235 | for(x=res-1;x>=0;x--) {
236 | if (bits[x]==2) {
237 | bits[x]=num&1;
238 | num>>=1;
239 | }
240 | }
241 | if (obyte) {
242 | //printf("res=%d\n", res);
243 | uint8_t b=0;
244 | for(x=0;x<8;x++) {
245 | if (debug) printf("%d",bits[x]);
246 | if (bits[x]) b|=128>>x;
247 | }
248 | if (debug) printf(" ");
249 | if (!debug) out_buf[outpos++]=b;
250 | for(;x>1)) {bits[res++]=0; break;}
279 | bits[res++]=1;
280 | num-=x>>1;
281 | bits[res++]=2;
282 | }
283 |
284 | for(x=res-1;x>=0;x--) {
285 | if (bits[x]==2) {
286 | bits[x]=num&1;
287 | num>>=1;
288 | }
289 | }
290 | for(x=0;xb? a:b;
315 | }
316 |
317 | static inline void put_lz(int offset,int length,int used) {
318 | #ifdef EXPERIMENTS
319 | uint16_t code512 = 0x100;
320 | #endif
321 |
322 | if (flzlit) fprintf(flzlit,"%c",1);
323 | putbit(1); bitslzlen++;
324 | offset=-offset; /* 1.. */
325 | offset--; /* 0.. */
326 | if (was_letter) { bitsolzlen++;
327 | was_letter=0;
328 | if (old_ofs==offset) {
329 | stolz++;
330 | if (folz) fprintf(folz,"%c",0);
331 |
332 | #ifdef EXPERIMENTS
333 | // test combining everything into one model for simple entropy coding
334 | code512 |= 0x80;
335 | if (length-MINOLEN < 0x7F) {
336 | code512 |= length-MINOLEN;
337 | } else {
338 | code512 |= 0x3F;
339 | length-=MINOLEN+0x7F;
340 | fwrite(&length, 4, 1, test2);
341 | length+=MINOLEN+0x7F;
342 | }
343 | code512 = (code512 & 0xFF) << 8 | (code512>>8);
344 | fwrite(&code512, 2, 1, test);
345 | #endif
346 |
347 | putbit(0);
348 | putenc_l(length-MINOLEN);
349 | return;
350 | }
351 | if (folz) fprintf(folz,"%c",1);
352 | putbit(1);
353 | }
354 | length-=MINLZ;
355 | stlz++;
356 | if (offset+1>=longlen) { length--; }
357 | if (offset+1>=hugelen) { length--; }
358 |
359 | #ifdef EXPERIMENTS
360 | if (length < 15) {
361 | code512 |= length;
362 | } else {
363 | code512 |= 15;
364 | length-=15;
365 | fwrite(&length, 4, 1, test2);
366 | length+=15;
367 | }
368 | code512 |= (offset & 0x7) << 4;
369 | code512 = (code512 & 0xFF) << 8 | (code512>>8);
370 | fwrite(&code512, 2, 1, test);
371 | uint tmpofs = offset >> 4;
372 | fwrite(&tmpofs, 4, 1, test3);
373 | #endif
374 |
375 | putenc(offset,used,breaklz, 0);
376 | putenc_l(length-MINLZ+2);
377 |
378 | old_ofs=offset;
379 | }
380 |
381 | static inline void put_letter(uint8_t b) {
382 | #ifdef EXPERIMENTS
383 | uint16_t code512 = b;
384 | code512 = (code512 & 0xFF) << 8 | (code512>>8);
385 | fwrite(&code512, 2, 1, test);
386 | #endif
387 |
388 | if (flzlit) fprintf(flzlit,"%c",0);
389 | if (flit) fprintf(flit,"%c",b);
390 | putbit(0); bitslzlen++;
391 | out_buf[outpos++]=b; bitslit+=8;
392 | was_letter++;
393 | stlet++;
394 | }
395 |
396 | static inline int price_lz(int offset, int length, int used) { // offset>=1, length>=2,
397 | // if offset=>0xD00 length>=3
398 | int res=1; /* 1 bit = not a letter */
399 | if (offset>=longlen) { length--; }
400 | if (offset>=hugelen) { length--; }
401 |
402 | offset--; // 0..
403 |
404 | res+=price_offset(offset,used);
405 | res+=price_len(length-MINLZ+2);
406 | return res;
407 | }
408 |
409 | static inline int price_lzlen(int offset, int length, int used) { // offset>=1, length>=2,
410 | // if offset=>0xD00 length>=3
411 | int res=1; /* 1 bit = not a letter */
412 | if (offset>=longlen) { length--; }
413 | if (offset>=hugelen) { length--; }
414 |
415 | res+=price_len(length-MINLZ+2);
416 | return res;
417 | }
418 |
419 | static inline int price_replz_minus_lz(int offset, int length, int used) { // offset>=1, length>=2,
420 | // if offset=>0xD00 length>=3
421 | int res=2 /* lzlit flag, replz flag */ +price_replen(length+2-MINOLEN);
422 | return res-price_lz(offset,length,used);
423 | }
424 |
425 | static inline int cmpstr(int src,int src2) {
426 | int res=0;
427 | int b;
428 |
429 | for(;;) {
430 | if (in_buf[src]!=in_buf[src2]) return res;
431 | b=rle[src2];
432 | if (!b) return res;
433 | if (b>rle[src]) {return res+rle[src];}
434 | res+=b;
435 | src+=b;
436 | src2+=b;
437 | }
438 | return res;
439 | }
440 |
441 | int cmpstrsort(int *psrc,int *psrc2) {
442 | int b;
443 | int src = *psrc;
444 | int src2 = *psrc2;
445 | // printf("%d:%d:%d\n",src,src2,left);
446 | do {
447 | if (in_buf[src]in_buf[src2]) return 1;
449 | b=rle[src2];
450 | if (!b) return 1; // first string is longer
451 | if (b>rle[src]) b=rle[src];
452 | if (!b) return -1; // second string is longer
453 | src+=b;
454 | src2+=b;
455 | } while(1);
456 | }
457 |
458 | void init_same(int start, int n) {
459 | int i;
460 | uint16_t bb;
461 | int run_len;
462 | int gen_same[256*256+256];
463 |
464 | /*
465 | Notes: the slowest parts here are PLCP array construction and divsufsort.
466 | On slower levels -7 .. -9 it does not matter.
467 | But on fast levels -1..-3 (that still provide good compression),
468 | initialization takes about 20-30% processing time.
469 |
470 | Also, it reprocesses whole history each time a new block is read,
471 | which is clearly not optimal.
472 |
473 | possible optimizations:
474 | 1. store SA for later reuse, do divsufsort for new block only, then
475 | merge them. not sure if it will be faster. still need to recalculate rlcp
476 | 2. get rid of SA completely, construct suffix tree directly.
477 | */
478 | for(i=0;i<256+256*256;i++) gen_same[i] =0; // for bucketA & bucketB
479 | divsufsort(in_buf,sorted,gen_same,n);
480 | // reuse sorted_prev for temp buffer
481 | #define rank(i) rle[i]
482 | /*
483 | calculate plcp in O(n) time
484 | see http://www.cs.ucr.edu/~stelo/cpm/cpm09/04_karkk.pdf
485 | http://www.mi.fu-berlin.de/wiki/pub/ABI/Sequence_analysi_2013/2004_ManziniTwo_Space_Saving_Tricks_for_Linear_Time_LCP_Array_Computation.pdf
486 | */
487 | for(i=1;i<=n-1;i++) rank(sorted[i]) = sorted[i-1];
488 | rank(sorted[0]) = sorted[n-1];
489 |
490 | sorted_prev(sorted[0])=-1;
491 | for(i=1;i0) h--;
506 | }
507 |
508 | rle[n] = run_len = 0;
509 | uint8_t b = in_buf[n-1];
510 | for(i=n-1;i>=0;i--) {
511 | if (in_buf[i]==b)
512 | run_len++;
513 | else {
514 | b=in_buf[i];
515 | run_len = 1;
516 | }
517 | rle[i]=run_len;
518 | }
519 |
520 | bb=0;
521 |
522 | for(i=0;i<65536;i++) {gen_same[i]=-1; }
523 | for(i=0;i=0) { samelen(i)=1+cmpstr(i+1,same(i)+1);}
527 | gen_same[bb]=i;
528 | }
529 | same(i)=-1;
530 |
531 | in_buf[n]=0;
532 |
533 | if (verbose) printf("init done.\n");
534 | }
535 |
536 | #define CHECK_REPLZ \
537 | int k;\
538 | int jjj;\
539 | int d=level;\
540 | int tmp=pofs+price_lzlen(used-pos,len,used);\
541 | int olen=0;\
542 | for(k=len+1;k=MINOLEN) {\
572 | int tmp2=tmp+2+price_replen(olen+2-MINOLEN);\
573 | tmp2+=cache(used+k+olen);\
574 | if (best_len(used+k+olen)==1) {\
575 | int jj;\
576 | for(jj=1;jj<=8;jj++) {\
577 | if (best_len(used+k+olen+jj)>1) {\
578 | if (best_ofs(used+k+olen+jj)==pos-used) {\
579 | tmp2+=price_replz_minus_lz(used-pos,best_len(used+k+olen+jj),used+k+olen+jj);\
580 | break;\
581 | }\
582 | }\
583 | int olen2=cmpstr(used+k+olen+jj,pos+k+olen+jj);\
584 | for (jjj=MINOLEN;jjj<=olen2;jjj++) {\
585 | /* if (olen2>=MINOLEN) {*/\
586 | int tmp3=-cache(used+k+olen);\
587 | tmp3+=jj*9+2+price_replen(jjj+2-MINOLEN);\
588 | tmp3+=cache(used+k+olen+jj+jjj);\
589 | if (tmp3<0) { tmp3+=tmp2;\
590 | if (tmp3=0) {
634 | sorted_next(sorted_prev(n-1))=sorted_next(n-1);
635 | }
636 | if (sorted_next(n-1)>=0) {
637 | sorted_len(sorted_next(n-1)) = Min(sorted_len(sorted_next(n-1)),
638 | sorted_len(n-1));
639 | sorted_prev(sorted_next(n-1))=sorted_prev(n-1);
640 | }
641 |
642 | for(i=n-2;i>=start;i--) {
643 | int used=i;
644 | int left=n-i;
645 | int res;
646 | int pos;
647 | int max_match;
648 | int len;
649 | int j;
650 |
651 | int my_best_ofs=0;
652 | int my_best_len=1;
653 | int my_use_olz=0;
654 | int my_use_olz2=0;
655 | int my_olz_len=0;
656 | int my_olz_len2=0;
657 | int match_check_max;
658 | int notskip = 1;
659 |
660 | res=9+cache(used+1);
661 | if (best_ofs(used+1)) {
662 | res++;
663 | if (in_buf[used]==in_buf[used+1]) {
664 | if (in_buf[used]==in_buf[used-1]) {
665 | if (in_buf[used]==in_buf[used+best_ofs(used+1)]) {
666 | if ((best_len(used+1)>3)||(best_len(used+1)==3&&-best_ofs(used+1)=5)
679 | notskip = 0;
680 | }
681 | }
682 | }
683 | }
684 | }
685 |
686 | int k;
687 | for(k=1;k<4;k++)
688 | if (n-i>2+k && best_ofs(used+2+k)
689 | && used+best_ofs(used+2+k) >= 0
690 | && -best_ofs(used+2+k) < longlen && best_ofs(used+2+k)!=best_ofs(used+1+k)) {
691 | if (in_buf[used]==in_buf[used+best_ofs(used+2+k)]) {
692 | if (in_buf[used+1]==in_buf[used+1+best_ofs(used+2+k)]) {
693 | int tmp=cache(used+2+k)+price_replz_minus_lz(-best_ofs(used+2+k),best_len(used+2+k),used+2+k)
694 | +9*k+price_lz(-best_ofs(used+2+k),2,used);
695 | if (tmp<=res) {
696 | res=tmp;
697 | my_best_ofs=best_ofs(used+2+k);
698 | my_best_len=2;
699 | my_use_olz=k;
700 | my_olz_len=best_len(used+2+k);
701 | my_use_olz2=use_olz(used+2+k);
702 | my_olz_len2=olz_len(used+2+k);
703 | }
704 | }
705 | }
706 | }
707 |
708 | for(k=1;k<4;k++)
709 | if (n-i>3+k && best_ofs(used+3+k)
710 | && used+best_ofs(used+3+k) >= 0
711 | && -best_ofs(used+3+k) < hugelen && best_ofs(used+3+k)!=best_ofs(used+2+k)) {
712 | if (in_buf[used]==in_buf[used+best_ofs(used+3+k)]) {
713 | if (in_buf[used+1]==in_buf[used+1+best_ofs(used+3+k)]) {
714 | if (in_buf[used+2]==in_buf[used+2+best_ofs(used+3+k)]) {
715 | int tmp=cache(used+3+k)+price_replz_minus_lz(-best_ofs(used+3+k),best_len(used+3+k),used+3+k)
716 | +9*k+price_lz(-best_ofs(used+3+k),3,used);
717 | if (tmp<=res) {
718 | res=tmp;
719 | my_best_ofs=best_ofs(used+3+k);
720 | my_best_len=3;
721 | my_use_olz=k;
722 | my_olz_len=best_len(used+3+k);
723 | my_use_olz2=use_olz(used+3+k);
724 | my_olz_len2=olz_len(used+3+k);
725 | }
726 | }
727 | }
728 | }
729 | }
730 |
731 | for(k=1;k<4;k++)
732 | if (n-i>4+k && best_ofs(used+4+k)
733 | && used+best_ofs(used+4+k) >= 0
734 | && best_ofs(used+4+k)!=best_ofs(used+3+k)) {
735 | if (in_buf[used]==in_buf[used+best_ofs(used+4+k)]) {
736 | if (in_buf[used+1]==in_buf[used+1+best_ofs(used+4+k)]) {
737 | if (in_buf[used+2]==in_buf[used+2+best_ofs(used+4+k)]) {
738 | if (in_buf[used+3]==in_buf[used+3+best_ofs(used+4+k)]) {
739 | int tmp=cache(used+4+k)+price_replz_minus_lz(-best_ofs(used+4+k),best_len(used+4+k),used+4+k)
740 | +9*k+price_lz(-best_ofs(used+4+k),4,used);
741 | if (tmp<=res) {
742 | res=tmp;
743 | my_best_ofs=best_ofs(used+4+k);
744 | my_best_len=4;
745 | my_use_olz=k;
746 | my_olz_len=best_len(used+4+k);
747 | my_use_olz2=use_olz(used+4+k);
748 | my_olz_len2=olz_len(used+4+k);
749 | }
750 | }
751 | }
752 | }
753 | }
754 | }
755 | pos=same(used);
756 | if (pos<0) goto done;
757 | if (!notskip) goto done;
758 |
759 | {
760 | len=samelen(used);
761 | int ll=(used-pos>=longlen)?1:0;
762 | if (used-pos>=hugelen) ll=2;
763 | int pofs = price_offset(used-pos-1,used);
764 | if (len=2+ll) {
765 | CHECK_REPLZ
766 | }
767 | for(j=MINLZ+ll;j<=len;j++) {
768 | int tmp=pofs+price_lzlen(used-pos,2-MINLZ+j,used);
769 | tmp+=cache(used+j);
770 | if (tmp=longlen)?1:0;
787 | if (used-pos>=hugelen) ll=2;
788 | //if (used-pos>=longlen) break;
789 | if (pos<0) break;
790 | if (len>slen) {
791 | len=slen;
792 | } else if (len==slen) {
793 | len+=cmpstr(used+len,pos+len);
794 | }
795 | int pofs = price_offset(used-pos-1,used);
796 | if (len=2+ll) {
797 | CHECK_REPLZ
798 | }
799 | if (len>max_match) {
800 | for(j=Max(max_match+1,MINLZ+ll);j<=len;j++) {
801 | int tmp=pofs+price_lzlen(used-pos,j-MINLZ+2,used);
802 | tmp+=cache(used+j);
803 | if (tmp= 0 ? sorted_len(used) : 0;
819 | int len_bottom=bottom >= 0 ? sorted_len(bottom) : 0;
820 |
821 | match_check_max = match_level;
822 | int my_min_ofs=used+1;
823 | while (top>=0 || bottom >=0) {
824 | match_check_max--;
825 | if (match_check_max<=0) goto done;
826 | if (len_top>len_bottom) {
827 | pos=top;
828 | len=len_top;
829 | len_top = Min(len_top,top >= 0 ? sorted_len(top):0);
830 | top=sorted_prev(pos);
831 | } else {
832 | pos=bottom;
833 | len=len_bottom;
834 | bottom=sorted_next(pos);
835 | len_bottom = Min(len_bottom,bottom >= 0 ? sorted_len(bottom):0);
836 | }
837 | if (len<=MINLZ) goto done;
838 | if (len<=MINLZ+1 && used-pos>=hugelen) continue; //
839 | int pofs = price_offset(used-pos-1,used);
840 | if (lenused-pos) {
844 | my_min_ofs=used-pos;//we are checking matches in decreasing order. we need to check next matches only if those are shorter
845 | int ll=(used-pos>=hugelen)?1:0;
846 | for(j=MINLZ+1+ll;j<=len;j++) {
847 | int tmp=pofs+price_lzlen(used-pos,j-MINLZ+2,used);
848 | tmp+=cache(used+j);
849 | if (tmp=0) {
863 | sorted_next(sorted_prev(used))=sorted_next(used);
864 | }
865 | if (sorted_next(used)>=0) {
866 | sorted_len(sorted_next(used)) = Min(sorted_len(sorted_next(used)),
867 | sorted_len(used));
868 | sorted_prev(sorted_next(used))=sorted_prev(used);
869 | }
870 |
871 | best_ofs(used)=my_best_ofs;
872 | best_len(used)=my_best_len;
873 | use_olz(used)=my_use_olz;
874 | olz_len(used)=my_olz_len;
875 | use_olz2(used)=my_use_olz2;
876 | olz_len2(used)=my_olz_len2;
877 | cache(used)=res;
878 |
879 | if (verbose && (i&0xFFF)==0) {
880 | printf("\x0D%d left ",i-start);
881 | fflush(stdout);
882 | }
883 | }
884 |
885 | res=8+cache(start);
886 | if (verbose) printf("\nres=%d\n",res);
887 | res+=7;
888 | res>>=3;
889 | if (verbose) printf("res bytes=%d\n",res);
890 | if (res>=n-start) {
891 | return n;
892 | };
893 |
894 | /* now we can easily generate compressed stream */
895 | initout(start);
896 | for(i=start;i0) {
911 | for(;k>0;k--) put_letter(in_buf[i++]);
912 | if ((use_olz(i))&&(len==best_len(i))&&(ofs==best_ofs(i))) goto dolz;
913 | // printf("put_lz %d:%d,left=%d\n",ofs,len,n-i);
914 | put_lz(ofs,len,i);
915 | i+=len;
916 | if (k2>0) {
917 | for(;k2>0;k2--) put_letter(in_buf[i++]);
918 | if ((use_olz(i))&&(len2==best_len(i))&&(ofs==best_ofs(i))) goto dolz;
919 | // printf("put_lz %d:%d,left=%d\n",ofs,len,n-i);
920 | put_lz(ofs,len2,i);
921 | i+=len2;
922 |
923 | }
924 |
925 | }
926 | }
927 | }
928 | if (verbose) printf("out bytes=%d\n",outpos);
929 | return outpos;
930 | }
931 |
932 | int main(int argc,char *argv[]) {
933 | FILE *ifd,*ofd;
934 | int n,i,bres,blz;
935 | uint8_t b;
936 |
937 | if (argc<3) {
938 | // note: -d0 (32k history) does not work right now
939 | printf("usage: lzoma [OPTION] input output [lzlit lit olz len dist]\n"
940 | "\t-1 .. -9 Compression level (default 7)\n"
941 | "\t-d[1..15] History size (default 9: 16M history; compression currently requires about 30x*history RAM)\n"
942 | "\t-v Be verbose\n"
943 | );
944 | printf("Notice: this program is at experimental stage of development. Compression format is not stable yet.\n");
945 | if (argc>1 && argv[1][0]=='%') { // undocumented debug feature to check correctness of offset encoding, when tuning parameters in lzoma.h
946 | int i;
947 | int total=atoi(argv[1]+1);//16*1024*1024;
948 | printf("%d\n",total);
949 | for(i=total-10;i='1' && argv[arg][1]<='9')
962 | metalevel = argv[arg][1]-'0';
963 | if (argv[arg][1]=='v')
964 | verbose = 1;
965 | if (argv[arg][1]=='d') {
966 | dict_size = atoi(argv[arg]+2);
967 | if (dict_size <1) dict_size=1;
968 | if (dict_size >15) dict_size=15;
969 | }
970 | arg++;
971 | }
972 | history_size=HISTORY_SIZE(dict_size);
973 | block_size=BLOCK_SIZE(dict_size);
974 | metalevel--;
975 | level=levels[metalevel][0];
976 | short_match_level=levels[metalevel][1];
977 | match_level=levels[metalevel][2];
978 | in_buf = (void *)malloc(history_size * sizeof(uint8_t)+1);
979 | rle = (void *)malloc(history_size * sizeof(uint32_t));
980 | state = (void *)malloc(Max(block_size * sizeof(FutureState), history_size * sizeof(uint32_t)));
981 | past_state = (void *)malloc(history_size * sizeof(PastState));
982 | char *inf=argv[arg++];
983 | char *ouf=argv[arg++];
984 | ifd=fopen(inf,"rb");
985 | ofd=fopen(ouf,"wb");
986 | if (arghistory_size-block_size) {
1002 | memmove(in_buf, in_buf+block_size, history_size-block_size);
1003 | in_offset -= block_size;
1004 | }
1005 | n=fread(in_buf+in_offset,1,block_size,ifd);
1006 | if (n<=0) {
1007 | blk = BLOCK_STORED | BLOCK_LAST;
1008 | fwrite(&blk,4,1,ofd);
1009 | break;
1010 | }
1011 | if (verbose) printf("got %d bytes, packing...\n",n);
1012 | if (blocknum==0) {
1013 | /*
1014 | int b1=cnt_bpes(in_buf,n);
1015 | int use_e8=1;
1016 | e8(in_buf, n);
1017 | int b2=cnt_bpes(in_buf,n);
1018 | printf("stats noe8 %d e8 %d\n",b1,b2);
1019 | if (b2<=b1) {
1020 | use_e8=0;
1021 | printf("reverted e8\n");
1022 |
1023 | e8back(in_buf,n);
1024 | }
1025 | */
1026 | /*
1027 | write compressed file header
1028 | we do it here only after we read some data
1029 | TODO:
1030 | at this stage we should decide if we will use any file-level compression filters
1031 | */
1032 | uint8_t header[8];
1033 | header[0] = AuthorID >> 8;
1034 | header[1] = AuthorID & 0xFF;
1035 | header[2] = AlgoID[0];
1036 | header[3] = AlgoID[1];
1037 | header[4] = AlgoID[2];
1038 | header[5] = AlgoID[3];
1039 | header[6] = Version;
1040 | int flags=0;
1041 | header[7] = flags << 4 | dict_size;
1042 | fwrite(header,8,1,ofd);
1043 |
1044 | bres=pack(1,n);
1045 | } else { // next blocks
1046 | bres=pack(in_offset,in_offset+n);
1047 | }
1048 | uint32_t blk = (n < block_size) ? BLOCK_LAST : 0;
1049 | if (bres==n) {
1050 | blk |= BLOCK_STORED;
1051 | blk |= n;
1052 | fwrite(&blk,4,1,ofd);
1053 | fwrite(in_buf+in_offset,1,n,ofd);
1054 | } else {
1055 | blk |= bres;
1056 | fwrite(&blk,4,1,ofd);
1057 | if (blk & BLOCK_LAST)
1058 | fwrite(&n,4,1,ofd);
1059 | fwrite(out_buf,1,bres,ofd);
1060 | if (blk & BLOCK_LAST)
1061 | break;
1062 | }
1063 |
1064 | in_offset += n;
1065 | }
1066 | if (verbose) printf("closing files let=%d lz=%d olz=%d\n",stlet,stlz,stolz);
1067 | if (verbose) printf("bits lzlit=%d let=%d olz=%d match=%d len=%d\n",bitslzlen,bitslit,bitsolzlen,bitsdist,bitslen);
1068 | fclose(ifd);
1069 | fclose(ofd);
1070 |
1071 | #ifdef EXPERIMENTS
1072 | fclose(test);
1073 | fclose(test2);
1074 | fclose(test3);
1075 | #endif
1076 |
1077 | return 0;
1078 | }
1079 |
--------------------------------------------------------------------------------
/readme.MSVC:
--------------------------------------------------------------------------------
1 | To compile with MSVC:
2 |
3 | Open Developer Command Prompt and type:
4 | cl pack.c divsufsort.c
5 | cl unpack.c
6 |
7 | Note: MSVC support is currently untested, probably has bugs. Better use gcc or mingw if possible.
8 |
9 |
10 |
--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
1 | Experimental packer based on new compression algoritm LZOMA).
2 | (C)2015-2016 Alexandr Efimov
3 |
4 | This code can be redistributed on GPL Version 2 License.
5 | For commercial licenses or support please contact author.
6 |
7 | Project goals:
8 | extremely fast in-place decompression (similar to LZO)
9 | but with high compression ratio (much better than LZO, GZIP, BZIP2)
10 |
11 | Current results:
12 |
13 | Compression ratio is much higher than gzip. And much much higher than LZO.
14 | Decompression speed is similar to UCL (a bit slower that LZO, faster than
15 | gzip, bzip2 etc).
16 | Decompressor code length is less than 300 bytes.
17 | Has special filter for x86 code.
18 | Decompression can be done in-place and does not require additional memory.
19 |
20 | Overall, the results a very good for "compress once, unpack often" tasks like
21 | linux kernel & ramdisk, readonly compressed filesystems.
22 |
23 | Comparison with other compression software:
24 | Nearest competitors are zstd, brotli.
25 | Other compressors/archives either decompress much slower or has much worse compression ratio.
26 |
27 | Compression ratio on binary files (without effect of e8e8 filter),
28 | from best to worst:
29 | brotli, lzoma, zstd
30 |
31 | Compression ratio on text files:
32 | brotli, zstd, lzoma
33 |
34 | Decompressor code size:
35 | lzoma, zstd, brotli
36 |
37 | Decompression speed, on x86-64:
38 | zstd is about 2x faster, lzoma and brotli has similar speed.
39 |
40 | Decompression speed, on Intel Atom tablet:
41 | zstd and lzoma has similar speed, brotli is 4x slower.
42 |
43 | Algorithm description.
44 |
45 | Compressed format has some features similar to both LZO and LZMA.
46 | Does not use range coding.
47 | Special bit added to matches that follow literals, indicating to re-use
48 | previous offset instead of always storing the offset for each match.
49 | This allows to more efficiently compress patterns like abcdEabcdFabcdGabc, as
50 | offset will be stored only for first match.
51 |
52 | This idea allows much higher compression than classical LZ algorithms but
53 | compressor is much more complicated.
54 |
55 | Compressed data format:
56 | literal, item, ... item
57 |
58 | Where:
59 | literal is uncompressed byte aligned at byte boundary
60 | item is:
61 | 1 bit flag (literal | match)
62 | if flag is literal then literal follows
63 |
64 | if flag is match then
65 | if previous item was literal
66 | 1 bit flag==1: use previous offset for match
67 | if not use previous offset for match
68 | offset (encoded)
69 | len (encoded)
70 |
71 | Notes:
72 |
73 | Algorithm is still experimental, compressed format is not final yet.
74 |
75 | File format (WIP, not implemented yet):
76 | 1. Header
77 | uint8_t[2] AuthorID 0xA1, 0xEF // this goes before AlgoID to avoid possible signature conflict with other LZ compressors
78 | uint8_t[4] AlgoID 'L','Z','O','M'
79 | uint8_t Version 0x00
80 | uint8_t HistorySize (low 4 bits) || Flags
81 | where HistorySize is
82 | 0: 32k
83 | 1: 64k
84 | 2: 128k
85 | 3: 256k
86 | 4: 512k
87 | 5: 1M
88 | 6: 2M
89 | 7: 4M
90 | 8: 8M
91 | 9: 16M
92 | 10:32M
93 | 11:64M
94 | 12:128M
95 | 13:256M
96 | 14:512M
97 | 15: 1G
98 | BlockSize = HistorySize / 16
99 |
100 | Flags:
101 | 0x10 - use filters, 1 byte filter type follows
102 | 0x00 - x86
103 | 0x01 - x86-64
104 | 0x02 - arm
105 | 0x03 - mips
106 | 0x04 - 0xF - reserved
107 | 0x10 - use delta filter
108 | 0x20 - text/xml filter
109 | 0x40 - reserved
110 | 0x80 - reserved
111 | 0x20 - encrypted file
112 | TODO: some compression header follows
113 | 0x40 - digitally signed file (signature follows at the end of file)
114 | 0x80 - reserved
115 |
116 | 2. Blocks
117 | Blocks header is 4 bytes or more:
118 | high bits masks:
119 | 0x80000000 - if set, it is a stored block
120 | 0x40000000 - last block, 4 byte unpacked length follows unless it is a stored block
121 | if not set, unpacked length assumed to be BLOCK_SIZE
122 | 0x20000000 - reserved
123 | 0x10000000 - reserved
124 | low 28 bits = packed length up to 2^28, can be zero
125 |
126 | 3. uint32_t CRC
127 |
128 |
--------------------------------------------------------------------------------
/unpack.c:
--------------------------------------------------------------------------------
1 | // test file decompression using LZOMA algoritm
2 | // (c) Alexandr Efimov, 2015-2016
3 | // License: GPL v2 or later
4 |
5 | #include
6 | #include
7 | #include
8 | #ifndef _MSC_VER
9 | #include
10 | #endif
11 | #include
12 | #include
13 | //#include
14 |
15 | #ifndef O_BINARY
16 | #ifdef _O_BINARY
17 | #define O_BINARY _O_BINARY
18 | #else
19 | #define O_BINARY 0
20 | #endif
21 | #endif
22 |
23 | #include "lzoma.h"
24 |
25 | uint8_t *in_buf; /* text to be decoded */
26 | uint8_t *out_buf;/* decoded text + history */
27 |
28 | //#define getbit (((bits=bits&0x7f? bits+bits : (((unsigned)(*src++))<<1)+1)>>8)&1)
29 | #define getbit ((bits=bits&0x7fffffff? (resbits=bits,bits+bits) : (src+=4,resbits=*((uint32_t *)(src-4)),(resbits<<1)+1)),resbits>>31)
30 |
31 | #define getcode(bits, src, ptotal) {\
32 | int total = (ptotal);\
33 | ofs=0;\
34 | long int res=0;\
35 | int x=256;\
36 | int top=0;\
37 | top=lzlow(total);\
38 | res=*src++;\
39 | \
40 | while (1) {\
41 | x+=x;\
42 | if (x>=total+top) break;\
43 | if (x & lzmagic)\
44 | top=lzshift(top);\
45 | if (res=x) { \
53 | res+=res+getbit;\
54 | res-=x;\
55 | }\
56 | getcode_doneit: \
57 | ofs+=res;\
58 | }
59 |
60 | #define getlen(bits, src) {\
61 | long int res=0;\
62 | \
63 | if (getbit==0) {\
64 | len+=getbit;\
65 | goto getlen_0bit;\
66 | }\
67 | len+=2;\
68 | while (1) { \
69 | res+=res+getbit;\
70 | if (getbit==0) break;\
71 | res++;\
72 | }\
73 | len+=res;\
74 | getlen_0bit: ;\
75 | }
76 |
77 | static void unpack_c(int current_history_size, int history_size, uint8_t *src, uint8_t *dst, uint8_t *start, int left) {
78 | int ofs=-1;
79 | int len;
80 | uint32_t bits=0x80000000;
81 | uint32_t resbits;
82 | left--;
83 | history_size--;// becomes mask for circular buffer indexing
84 | if (current_history_size) {
85 | current_history_size-=dst-start;
86 | goto nextblock;
87 | }
88 |
89 | copyletter:
90 | *dst++=*src++;
91 | left--;
92 | nextblock:
93 | len=-1;
94 |
95 | get_bit:
96 | if (left<0) return;
97 | if (getbit==0) goto copyletter;
98 |
99 | /* unpack lz */
100 | if (len<0) {
101 | len=1;
102 | if (!getbit) {
103 | goto uselastofs;
104 | }
105 | }
106 | len=2;
107 | getcode(bits,src,dst-start+current_history_size);
108 | ofs++;
109 | if (ofs>=longlen) len++;
110 | if (ofs>=hugelen) len++;
111 | ofs=-ofs;
112 | uselastofs:
113 | getlen(bits,src);
114 | left-=len;
115 |
116 | int ptr = dst-start+ofs;
117 | do {
118 | *dst=start[ptr&(history_size)];
119 | ptr++;
120 | dst++;
121 | } while(--len);
122 | goto get_bit;
123 | }
124 |
125 | #ifdef ASM_X86
126 | extern unsigned int unpack_x86(uint8_t *src, uint8_t *dst, int left);
127 | #endif
128 |
129 | #include "e8.h"
130 | int main(int argc,char * argv[]) {
131 | int ifd,ofd;
132 | int n,n_unp;
133 | char shift;
134 |
135 | if (argc<3) {
136 | printf("usage: unpack input output\n Unpacks file packed using lzoma algoritm\n");
137 | printf("Notice: this program is at experimental stage of development. Compression format is not stable yet.\n");
138 | exit(0);
139 | }
140 |
141 | ifd=open(argv[1],O_RDONLY|O_BINARY);
142 | ofd=open(argv[2],O_WRONLY|O_TRUNC|O_CREAT|O_BINARY,511);
143 | int current_history = 0;
144 | int ofs = 0;
145 | int use_e8=0;
146 | uint8_t header[8];
147 | read(ifd,header,8);
148 | if (header[0] != (AuthorID >> 8) ||
149 | header[1] != (AuthorID & 0xFF) ||
150 | header[2] != AlgoID[0] ||
151 | header[3] != AlgoID[1] ||
152 | header[4] != AlgoID[2] ||
153 | header[5] != AlgoID[3] ||
154 | header[6] != Version) {
155 | fprintf(stderr, "Unsupported compressed data format\n");
156 | return 1;
157 | }
158 | int dict_size = header[7] & 0xF;
159 | int history_size = HISTORY_SIZE(dict_size);
160 | int block_size = BLOCK_SIZE(dict_size);
161 | in_buf = (uint8_t *)malloc(block_size);
162 | out_buf = (uint8_t *)malloc(history_size); // history is 16*block_size
163 |
164 | uint32_t blk;
165 | while(read(ifd,&blk,4)==4) {
166 | //if (use_e8) e8(out_buf,n_unp);
167 | n = blk & (block_size-1);
168 | if (blk & BLOCK_STORED) {
169 | n_unp = n;
170 | } else if (blk & BLOCK_LAST) {
171 | read(ifd,&n_unp,4);
172 | } else {
173 | n_unp = block_size;
174 | }
175 | /*
176 | if (n != n_unp && !current_history)
177 | read(ifd,&use_e8,1);
178 | else
179 | use_e8 = 0;
180 | */
181 | //long unsigned tsc = (long unsigned)__rdtsc();
182 | if (n == n_unp) {
183 | read(ifd,out_buf,n_unp);
184 | write(ofd,out_buf+ofs,n_unp);
185 | } else {
186 | read(ifd,in_buf,n);
187 | #ifdef ASM_X86
188 | #error Asm version not yet updated for recent format changes. Please use C version right now.
189 | unpack_x86(in_buf, out_buf, n_unp);
190 | #else
191 | unpack_c(current_history, history_size, in_buf, out_buf+ofs, out_buf, n_unp);
192 | #endif
193 | //tsc=(long unsigned)__rdtsc()-tsc;
194 | //printf("tsc=%lu\n",tsc);
195 | //if (use_e8) e8back(out_buf,n_unp);
196 | write(ofd,out_buf+ofs,n_unp);
197 | }
198 | if (blk & BLOCK_LAST)
199 | break;
200 | ofs+=n_unp;
201 | ofs &= (history_size-1);
202 | current_history += n_unp;
203 | if (current_history > history_size-block_size)
204 | current_history = history_size-block_size;
205 | }
206 |
207 | close(ifd);
208 | close(ofd);
209 | return 0;
210 | }
211 |
--------------------------------------------------------------------------------
/unpack_lzoma.S:
--------------------------------------------------------------------------------
1 | .file "unpack_lzoma.S"
2 | .section .text.unlikely,"ax",@progbits
3 | .text
4 | .align 16
5 | .globl unpack_x86
6 | .type unpack_x86, @function
7 | unpack_x86:
8 | pushl %ebp
9 | pushl %edi
10 | pushl %esi
11 | pushl %ebx
12 | movl 28(%esp), %edx # uncompressed bytes num
13 | movl 20(%esp), %esi # input buffer
14 | decl %edx
15 | movl 24(%esp), %edi # edi = output buffer
16 | pushl %edi # save output buffer start in stack
17 | xorl %ebp, %ebp
18 | incl %ebp # ebp = offset = -1
19 | movl $0x80000000,%eax
20 | .copyletter:
21 | movsb
22 | #movb (%esi), %cl # cl = *src
23 | #inc %esi # src++
24 | #movb %cl, (%edi) # *dst = cl
25 | #inc %edi # dst++
26 | orl $-1, %ebx # ebx = len = -1
27 | decl %edx # left--
28 | js .unpack_ret
29 | .checkleft:
30 | addl %eax,%eax
31 | jnz .nonextbit
32 | lodsl
33 | adcl %eax,%eax
34 | .nonextbit:
35 | .checkifletterorlz:
36 | jnc .copyletter
37 | .unpack_lz:
38 | incl %ebx
39 | push %edx # save left
40 | jnz .load_ofs
41 | addl %eax,%eax
42 | jnz .L9
43 | lodsl
44 | adcl %eax,%eax
45 | .L9:
46 | # ebx==0 at this point
47 | jnc .load_len
48 | .load_ofs:
49 | push %edi # save dst, we need edi as temp register
50 | xor %ebp,%ebp # ofs=0
51 | movzxb (%esi), %ecx # res=*src
52 | mov $512, %ebx # x=256
53 | subl 8(%esp), %edi # here (%esp) is pushed edi, 4(%esp) is left
54 | inc %ebp # ofs++
55 | inc %esi
56 | #movl $48, %edx
57 | lea 47(%ebp),%edx
58 | cmp $652630,%edi
59 | ja .low
60 | mov $60,%dl
61 | cmp $49549,%edi
62 | ja .low
63 | mov $80,%dl
64 | .low:
65 | .loop_ofs:
66 | addl %edi,%edx # top+=total
67 | cmpl %edx,%ebx # cmp total+top,x
68 | jns .ofs_last_bit
69 | subl %edi,%edx # top-=total
70 | test $0x055ffc00,%ebx
71 | jz .noshift
72 | leal (%edx,%edx,8),%edx # top*=9
73 | shrl $3,%edx # top>>=3
74 | .noshift:
75 | cmp %edx,%ecx # cmp top,res
76 | jl .ofs_final_calc # if res
5 | #include
6 |
7 | uint BytesLoaded;
8 |
9 | uint flen( FILE* f )
10 | {
11 | fseek( f, 0, SEEK_END );
12 | uint len = ftell(f);
13 | fseek( f, 0, SEEK_SET );
14 | return len;
15 | }
16 |
17 | void* fload( char* fname )
18 | {
19 | FILE* temp = fopen(fname,"rb");
20 | if (temp==0) return 0;
21 | unsigned int len = flen(temp);
22 | BytesLoaded = len;
23 | char* buf = new char[len];
24 | fread( buf, len, 1, temp );
25 | fclose( temp );
26 | return buf;
27 | }
28 |
29 | void fsave( void* buf, unsigned int len, char* fname )
30 | {
31 | FILE* temp = fopen(fname,"wb");
32 | fwrite( buf, len, 1, temp );
33 | fclose( temp );
34 | }
35 |
36 | uint fgetd( FILE* file)
37 | {
38 | return fgetc(file)+(fgetc(file)<<8)+(fgetc(file)<<16)+(fgetc(file)<<24);
39 | }
40 |
41 | uint fgetw( FILE* file)
42 | {
43 | return fgetc(file)+(fgetc(file)<<8);
44 | }
45 |
46 | void fputd( uint c, FILE* file )
47 | {
48 | fputc( c , file );
49 | fputc( c>> 8, file );
50 | fputc( c>>16, file );
51 | fputc( c>>24, file );
52 | }
53 |
54 |
55 | void fputw( uint c, FILE* file )
56 | {
57 | fputc( c , file );
58 | fputc( c>> 8, file );
59 | }
60 | #define Psh(c) ( c==0x06 || c==0x16 || c==0x1E || (c>0x4F && c<0x58) )
61 |
62 | #define wswap(a) ( ((a)>>8) + (((a)&255)<<8) )
63 | #define bswap(a) ( wswap((a)>>16)+(wswap((a)&65535)<<16) )
64 | //#define _bsw(a,i,h) (((uc(&)[4])(a))[i]<<(h))
65 | //#define bswap(a) ( _bsw(a,0,24)+_bsw(a,1,16)+_bsw(a,2,8)+_bsw(a,3,0) )
66 |
67 | #include