├── .gitignore ├── LICENSE ├── Makefile ├── ari ├── compbit.c ├── complen.c ├── complit.c ├── port.h ├── rangecod.c └── rangecod.h ├── bpe.h ├── divsufsort.c ├── divsufsort.h ├── e8.h ├── lzoma.h ├── pack.c ├── readme.MSVC ├── readme.txt ├── unpack.c ├── unpack_lzoma.S └── x86 ├── Makefile └── x86.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # Debug files 32 | *.dSYM/ 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | gcc -O2 -pipe pack.c divsufsort.c -o pack 3 | gcc -Os -fomit-frame-pointer -std=c99 -Os -pipe unpack.c -o unpack 4 | 5 | asm_x86: 6 | gcc -O2 -pipe pack.c divsufsort.c -o pack 7 | gcc -DASM_X86 -m32 -Os -fomit-frame-pointer -std=c99 -pipe unpack.c unpack_lzoma.S -o unpack 8 | 9 | test: 10 | ./pack pack.c pack.c.lzoma && ./unpack pack.c.lzoma pack.c.test && md5sum pack.c pack.c.test 11 | -------------------------------------------------------------------------------- /ari/compbit.c: -------------------------------------------------------------------------------- 1 | /* 2 | comp.c headerfile for quasistatic probability model 3 | 4 | (c) Michael Schindler 5 | 1997, 1998, 1999, 2000 6 | http://www.compressconsult.com/ 7 | michael@compressconsult.com 8 | 9 | This program is free software; you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation; either version 2 of the License, or 12 | (at your option) any later version. 13 | 14 | This program is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. It may be that this 18 | program violates local patents in your country, however it is 19 | belived (NO WARRANTY!) to be patent-free here in Austria. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with this program; if not, write to the Free Software 23 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, 24 | MA 02111-1307, USA. 25 | 26 | comp is an example compressor trying to compress files with a simple 27 | order 0 model. The files can be decompressed by decomp. 28 | 29 | Note that I do not think that an order 0 model as here is good; 30 | For better compression see for example my freeware szip. 31 | http://www.compressconsult.com/szip/ 32 | or ask me as consultant what compression method fits your data best. 33 | */ 34 | 35 | #include 36 | #include 37 | #ifndef unix 38 | #include 39 | #include 40 | #endif 41 | #include 42 | #include 43 | #include "port.h" 44 | #include "rangecod.h" 45 | 46 | void usage() 47 | { fprintf(stderr,"comp [inputfile [outputfile]]\n"); 48 | fprintf(stderr,"comp (c)1997.1998 Michael Schindler, michael@compressconsult\n"); 49 | exit(1); 50 | } 51 | 52 | int main( int argc, char *argv[] ) 53 | { int ch, syfreq, ltfreq; 54 | rangecoder rc; 55 | 56 | if ((argc > 3) || ((argc>0) && (argv[1][0]=='-'))) 57 | usage(); 58 | 59 | if ( argc<1 ) 60 | fprintf( stderr, "stdin" ); 61 | else 62 | { freopen( argv[1], "rb", stdin ); 63 | fprintf( stderr, "%s", argv[1] ); 64 | } 65 | if ( argc<2 ) 66 | fprintf( stderr, " to stdout\n" ); 67 | else 68 | { freopen( argv[2], "wb", stdout ); 69 | fprintf( stderr, " to %s\n", argv[2] ); 70 | } 71 | fprintf( stderr, "%s\n", coderversion); 72 | 73 | #ifndef unix 74 | setmode( fileno( stdin ), O_BINARY ); 75 | setmode( fileno( stdout ), O_BINARY ); 76 | #endif 77 | 78 | start_encoding(&rc,0,0); 79 | int prop=32768; 80 | int prev=0; 81 | 82 | /* do the coding */ 83 | while ((ch=getc(stdin))!=EOF) 84 | { 85 | encbit(&rc,ch,&prop); 86 | } 87 | 88 | done_encoding(&rc); 89 | 90 | return 0; 91 | } 92 | -------------------------------------------------------------------------------- /ari/complen.c: -------------------------------------------------------------------------------- 1 | /* 2 | comp.c headerfile for quasistatic probability model 3 | 4 | (c) Michael Schindler 5 | 1997, 1998, 1999, 2000 6 | http://www.compressconsult.com/ 7 | michael@compressconsult.com 8 | 9 | This program is free software; you can redistribute it and/or modify 10 | it under the terms of the GNU General Public License as published by 11 | the Free Software Foundation; either version 2 of the License, or 12 | (at your option) any later version. 13 | 14 | This program is distributed in the hope that it will be useful, 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | GNU General Public License for more details. It may be that this 18 | program violates local patents in your country, however it is 19 | belived (NO WARRANTY!) to be patent-free here in Austria. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with this program; if not, write to the Free Software 23 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, 24 | MA 02111-1307, USA. 25 | 26 | comp is an example compressor trying to compress files with a simple 27 | order 0 model. The files can be decompressed by decomp. 28 | 29 | Note that I do not think that an order 0 model as here is good; 30 | For better compression see for example my freeware szip. 31 | http://www.compressconsult.com/szip/ 32 | or ask me as consultant what compression method fits your data best. 33 | */ 34 | 35 | #include 36 | #include 37 | #ifndef unix 38 | #include 39 | #include 40 | #endif 41 | #include 42 | #include 43 | #include "port.h" 44 | #include "rangecod.h" 45 | 46 | void usage() 47 | { fprintf(stderr,"comp [inputfile [outputfile]]\n"); 48 | fprintf(stderr,"comp (c)1997.1998 Michael Schindler, michael@compressconsult\n"); 49 | exit(1); 50 | } 51 | 52 | int main( int argc, char *argv[] ) 53 | { int ch1,ch2,ch3,ch4, syfreq, ltfreq; 54 | rangecoder rc; 55 | //qsmodel qsm[48]; 56 | int prop[48]; 57 | 58 | if ((argc > 3) || ((argc>0) && (argv[1][0]=='-'))) 59 | usage(); 60 | 61 | if ( argc<1 ) 62 | fprintf( stderr, "stdin" ); 63 | else 64 | { freopen( argv[1], "rb", stdin ); 65 | fprintf( stderr, "%s", argv[1] ); 66 | } 67 | if ( argc<2 ) 68 | fprintf( stderr, " to stdout\n" ); 69 | else 70 | { freopen( argv[2], "wb", stdout ); 71 | fprintf( stderr, " to %s\n", argv[2] ); 72 | } 73 | fprintf( stderr, "%s\n", coderversion); 74 | 75 | #ifndef unix 76 | setmode( fileno( stdin ), O_BINARY ); 77 | setmode( fileno( stdout ), O_BINARY ); 78 | #endif 79 | 80 | /* make an alphabet with 257 symbols, use 256 as end-of-file */ 81 | #define SMALL 25 82 | //#define SMALL 400 83 | int j; 84 | for(j=0;j<48;j++) prop[j]=32768; 85 | // initqsmodel(&qsm[j],2,12,200,NULL,1); 86 | 87 | start_encoding(&rc,0,0); 88 | 89 | /* do the coding */ 90 | while (1) 91 | { 92 | int len; 93 | len = 0; 94 | if ((ch1=getc(stdin))==EOF) break; 95 | if ((ch2=getc(stdin))==EOF) break; 96 | if ((ch3=getc(stdin))==EOF) break; 97 | if ((ch4=getc(stdin))==EOF) break; 98 | len = ch4; len<<=8; 99 | len += ch3; len<<=8; 100 | len += ch2; len<<=8; 101 | len += ch1; 102 | //fprintf(stderr,"%d\n",len); 103 | int i=0; 104 | for(;;) { 105 | encbit(&rc,len&1,prop+i);i++; 106 | // qsgetfreq(&qsm[i],len&1,&syfreq,<freq); 107 | // encode_shift(&rc,syfreq,ltfreq,12); 108 | // qsupdate(&qsm[i],len&1); 109 | len>>=1; 110 | // i++; 111 | if (len==0) { 112 | encbit(&rc,1,prop+i); 113 | // qsgetfreq(&qsm[i],1,&syfreq,<freq); 114 | // encode_shift(&rc,syfreq,ltfreq,12); 115 | // qsupdate(&qsm[i],1); 116 | break; 117 | } 118 | encbit(&rc,0,prop+i);i++; 119 | // qsgetfreq(&qsm[i],0,&syfreq,<freq); 120 | // encode_shift(&rc,syfreq,ltfreq,12); 121 | // qsupdate(&qsm[i],0); 122 | // i++; 123 | len--; 124 | } 125 | } 126 | /* write 256 as end-of-file */ 127 | // qsgetfreq(&qsm1,SMALL,&syfreq,<freq); 128 | // encode_shift(&rc,syfreq,ltfreq,12); 129 | 130 | done_encoding(&rc); 131 | 132 | return 0; 133 | } 134 | -------------------------------------------------------------------------------- /ari/complit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #ifndef unix 4 | #include 5 | #include 6 | #endif 7 | #include 8 | #include 9 | #include "port.h" 10 | #include "rangecod.h" 11 | 12 | void usage() 13 | { fprintf(stderr,"comp [inputfile [outputfile]]\n"); 14 | fprintf(stderr,"comp (c)1997.1998 Michael Schindler, michael@compressconsult\n"); 15 | exit(1); 16 | } 17 | 18 | int main( int argc, char *argv[] ) 19 | { int ch1,ch2,ch3,ch4, syfreq, ltfreq; 20 | rangecoder rc; 21 | int prop[256]; 22 | 23 | if ((argc > 3) || ((argc>0) && (argv[1][0]=='-'))) 24 | usage(); 25 | 26 | if ( argc<1 ) 27 | fprintf( stderr, "stdin" ); 28 | else 29 | { freopen( argv[1], "rb", stdin ); 30 | fprintf( stderr, "%s", argv[1] ); 31 | } 32 | if ( argc<2 ) 33 | fprintf( stderr, " to stdout\n" ); 34 | else 35 | { freopen( argv[2], "wb", stdout ); 36 | fprintf( stderr, " to %s\n", argv[2] ); 37 | } 38 | fprintf( stderr, "%s\n", coderversion); 39 | 40 | #ifndef unix 41 | setmode( fileno( stdin ), O_BINARY ); 42 | setmode( fileno( stdout ), O_BINARY ); 43 | #endif 44 | 45 | int j; 46 | for(j=0;j<256;j++) prop[j]=32768; 47 | 48 | start_encoding(&rc,0,0); 49 | /* do the coding */ 50 | while (1) 51 | { 52 | unsigned char len; 53 | if ((ch1=getc(stdin))==EOF) break; 54 | len = ch1; 55 | //fprintf(stderr,"%d\n",len); 56 | int ctx=1; 57 | for(;ctx<256;) { 58 | encbit(&rc,len>>7,prop+ctx); 59 | ctx+=ctx+(len>>7); 60 | len+=len; 61 | } 62 | } 63 | done_encoding(&rc); 64 | 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /ari/port.h: -------------------------------------------------------------------------------- 1 | #ifndef port_h 2 | #define port_h 3 | #include 4 | 5 | #ifdef GCC 6 | #define Inline inline 7 | #else 8 | #define Inline __inline 9 | #endif 10 | 11 | #if INT_MAX > 0x7FFF 12 | typedef unsigned short uint2; /* two-byte integer (large arrays) */ 13 | typedef unsigned int uint4; /* four-byte integers (range needed) */ 14 | #else 15 | typedef unsigned int uint2; 16 | typedef unsigned long uint4; 17 | #endif 18 | 19 | typedef unsigned int uint; /* fast unsigned integer, 2 or 4 bytes */ 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /ari/rangecod.c: -------------------------------------------------------------------------------- 1 | #define NOWARN 2 | 3 | /* 4 | define EXTRAFAST for increased speed; you loose compression and 5 | compatibility in exchange. 6 | */ 7 | //#define EXTRAFAST 8 | 9 | #include 10 | #include "port.h" 11 | #include "rangecod.h" 12 | 13 | void encbit(rangecoder *rc,int bit, int *prop) 14 | { 15 | int prob=(*prop)>>3; 16 | 17 | int x=6; 18 | 19 | if (bit) { 20 | encode_shift(rc,8192-prob,prob,13); 21 | *prop-=(*prop)>>x; 22 | } else { 23 | encode_shift(rc,prob,0,13); 24 | *prop+=(65536-(*prop))>>x; 25 | } 26 | *prop&=~1; 27 | *prop|=bit; 28 | } 29 | /* SIZE OF RANGE ENCODING CODE VALUES. */ 30 | 31 | #define CODE_BITS 32 32 | #define Top_value ((code_value)1 << (CODE_BITS-1)) 33 | 34 | 35 | /* all IO is done by these macros - change them if you want to */ 36 | /* no checking is done - do it here if you want it */ 37 | /* cod is a pointer to the used rangecoder */ 38 | #define outbyte(cod,x) putchar(x) 39 | #define inbyte(cod) getchar() 40 | 41 | 42 | #ifdef RENORM95 43 | #include "renorm95.c" 44 | 45 | #else 46 | #define SHIFT_BITS (CODE_BITS - 9) 47 | #define EXTRA_BITS ((CODE_BITS-2) % 8 + 1) 48 | #define Bottom_value (Top_value >> 8) 49 | 50 | #ifdef NOWARN 51 | #ifdef GLOBALRANGECODER 52 | char coderversion[]="rangecoder 1.3 NOWARN GLOBAL (c) 1997-2000 Michael Schindler"; 53 | #else 54 | char coderversion[]="rangecoder 1.3 NOWARN (c) 1997-2000 Michael Schindler"; 55 | #endif 56 | #else /*NOWARN*/ 57 | #ifdef GLOBALRANGECODER 58 | char coderversion[]="rangecoder 1.3 GLOBAL (c) 1997-2000 Michael Schindler"; 59 | #else 60 | char coderversion[]="rangecoder 1.3 (c) 1997-2000 Michael Schindler"; 61 | #endif 62 | #endif /*NOWARN*/ 63 | #endif /*RENORM95*/ 64 | 65 | 66 | #ifdef GLOBALRANGECODER 67 | /* if this is defined we'll make a global variable rngc and */ 68 | /* make RNGC use that var; we'll also omit unneeded parameters */ 69 | static rangecoder rngc; 70 | #define RNGC (rngc) 71 | #define M_outbyte(a) outbyte(&rngc,a) 72 | #define M_inbyte inbyte(&rngc) 73 | #define enc_normalize(rc) M_enc_normalize() 74 | #define dec_normalize(rc) M_dec_normalize() 75 | #else 76 | #define RNGC (*rc) 77 | #define M_outbyte(a) outbyte(rc,a) 78 | #define M_inbyte inbyte(rc) 79 | #endif 80 | 81 | 82 | /* rc is the range coder to be used */ 83 | /* c is written as first byte in the datastream */ 84 | /* one could do without c, but then you have an additional if */ 85 | /* per outputbyte. */ 86 | void start_encoding( rangecoder *rc, char c, int initlength ) 87 | { RNGC.low = 0; /* Full code range */ 88 | RNGC.range = Top_value; 89 | RNGC.buffer = c; 90 | RNGC.help = 0; /* No bytes to follow */ 91 | RNGC.bytecount = initlength; 92 | } 93 | 94 | 95 | #ifndef RENORM95 96 | /* I do the normalization before I need a defined state instead of */ 97 | /* after messing it up. This simplifies starting and ending. */ 98 | static Inline void enc_normalize( rangecoder *rc ) 99 | { while(RNGC.range <= Bottom_value) /* do we need renormalisation? */ 100 | { if (RNGC.low < (code_value)0xff< output */ 101 | { M_outbyte(RNGC.buffer); 102 | for(; RNGC.help; RNGC.help--) 103 | M_outbyte(0xff); 104 | RNGC.buffer = (unsigned char)(RNGC.low >> SHIFT_BITS); 105 | } else if (RNGC.low & Top_value) /* carry now, no future carry */ 106 | { M_outbyte(RNGC.buffer+1); 107 | for(; RNGC.help; RNGC.help--) 108 | M_outbyte(0); 109 | RNGC.buffer = (unsigned char)(RNGC.low >> SHIFT_BITS); 110 | } else /* passes on a potential carry */ 111 | #ifdef NOWARN 112 | RNGC.help++; 113 | #else 114 | if (RNGC.bytestofollow++ == 0xffffffffL) 115 | { fprintf(stderr,"Too many bytes outstanding - File too large\n"); 116 | exit(1); 117 | } 118 | #endif 119 | RNGC.range <<= 8; 120 | RNGC.low = (RNGC.low<<8) & (Top_value-1); 121 | RNGC.bytecount++; 122 | } 123 | } 124 | #endif 125 | 126 | 127 | /* Encode a symbol using frequencies */ 128 | /* rc is the range coder to be used */ 129 | /* sy_f is the interval length (frequency of the symbol) */ 130 | /* lt_f is the lower end (frequency sum of < symbols) */ 131 | /* tot_f is the total interval length (total frequency sum) */ 132 | /* or (faster): tot_f = (code_value)1<> shift; 153 | tmp = r * lt_f; 154 | RNGC.low += tmp; 155 | #ifdef EXTRAFAST 156 | RNGC.range = r * sy_f; 157 | #else 158 | if ((lt_f+sy_f) >> shift) 159 | RNGC.range -= tmp; 160 | else 161 | RNGC.range = r * sy_f; 162 | #endif 163 | } 164 | 165 | 166 | #ifndef RENORM95 167 | /* Finish encoding */ 168 | /* rc is the range coder to be used */ 169 | /* actually not that many bytes need to be output, but who */ 170 | /* cares. I output them because decode will read them :) */ 171 | /* the return value is the number of bytes written */ 172 | uint4 done_encoding( rangecoder *rc ) 173 | { uint tmp; 174 | enc_normalize(rc); /* now we have a normalized state */ 175 | RNGC.bytecount += 5; 176 | if ((RNGC.low & (Bottom_value-1)) < ((RNGC.bytecount&0xffffffL)>>1)) 177 | tmp = RNGC.low >> SHIFT_BITS; 178 | else 179 | tmp = (RNGC.low >> SHIFT_BITS) + 1; 180 | if (tmp > 0xff) /* we have a carry */ 181 | { M_outbyte(RNGC.buffer+1); 182 | for(; RNGC.help; RNGC.help--) 183 | M_outbyte(0); 184 | } else /* no carry */ 185 | { M_outbyte(RNGC.buffer); 186 | for(; RNGC.help; RNGC.help--) 187 | M_outbyte(0xff); 188 | } 189 | M_outbyte(tmp & 0xff); 190 | M_outbyte((RNGC.bytecount>>16) & 0xff); 191 | M_outbyte((RNGC.bytecount>>8) & 0xff); 192 | M_outbyte(RNGC.bytecount & 0xff); 193 | return RNGC.bytecount; 194 | } 195 | 196 | 197 | /* Start the decoder */ 198 | /* rc is the range coder to be used */ 199 | /* returns the char from start_encoding or EOF */ 200 | int start_decoding( rangecoder *rc ) 201 | { int c = M_inbyte; 202 | if (c==EOF) 203 | return EOF; 204 | RNGC.buffer = M_inbyte; 205 | RNGC.low = RNGC.buffer >> (8-EXTRA_BITS); 206 | RNGC.range = (code_value)1 << EXTRA_BITS; 207 | return c; 208 | } 209 | 210 | 211 | static Inline void dec_normalize( rangecoder *rc ) 212 | { while (RNGC.range <= Bottom_value) 213 | { RNGC.low = (RNGC.low<<8) | ((RNGC.buffer<> (8-EXTRA_BITS); 216 | RNGC.range <<= 8; 217 | } 218 | } 219 | #endif 220 | 221 | 222 | /* Calculate culmulative frequency for next symbol. Does NO update!*/ 223 | /* rc is the range coder to be used */ 224 | /* tot_f is the total frequency */ 225 | /* or: totf is (code_value)1<=tot_f ? tot_f-1 : tmp); 236 | #endif 237 | } 238 | 239 | freq decode_culshift( rangecoder *rc, freq shift ) 240 | { freq tmp; 241 | dec_normalize(rc); 242 | RNGC.help = RNGC.range>>shift; 243 | tmp = RNGC.low/RNGC.help; 244 | #ifdef EXTRAFAST 245 | return tmp; 246 | #else 247 | return (tmp>>shift ? ((code_value)1< 69 | #if INT_MAX > 0xffff 70 | typedef unsigned int uint4; 71 | typedef unsigned short uint2; 72 | #else 73 | typedef unsigned long uint4; 74 | typedef unsigned int uint2; 75 | #endif 76 | #endif 77 | 78 | extern char coderversion[]; 79 | 80 | typedef uint4 code_value; /* Type of an rangecode value */ 81 | /* must accomodate 32 bits */ 82 | /* it is highly recommended that the total frequency count is less */ 83 | /* than 1 << 19 to minimize rounding effects. */ 84 | /* the total frequency count MUST be less than 1<<23 */ 85 | 86 | typedef uint4 freq; 87 | 88 | /* make the following private in the arithcoder object in C++ */ 89 | 90 | typedef struct { 91 | uint4 low, /* low end of interval */ 92 | range, /* length of interval */ 93 | help; /* bytes_to_follow resp. intermediate value */ 94 | unsigned char buffer;/* buffer for input/output */ 95 | /* the following is used only when encoding */ 96 | uint4 bytecount; /* counter for outputed bytes */ 97 | /* insert fields you need for input/output below this line! */ 98 | } rangecoder; 99 | 100 | 101 | void encbit(rangecoder *rc, int bit, int *prop); 102 | /* supply the following as methods of the arithcoder object */ 103 | /* omit the first parameter then (C++) */ 104 | #ifdef GLOBALRANGECODER 105 | #define start_encoding(rc,a,b) M_start_encoding(a,b) 106 | #define encode_freq(rc,a,b,c) M_encode_freq(a,b,c) 107 | #define encode_shift(rc,a,b,c) M_encode_shift(a,b,c) 108 | #define done_encoding(rc) M_done_encoding() 109 | #define start_decoding(rc) M_start_decoding() 110 | #define decode_culfreq(rc,a) M_decode_culfreq(a) 111 | #define decode_culshift(rc,a) M_decode_culshift(a) 112 | #define decode_update(rc,a,b,c) M_decode_update(a,b,c) 113 | #define decode_byte(rc) M_decode_byte() 114 | #define decode_short(rc) M_decode_short() 115 | #define done_decoding(rc) M_done_decoding() 116 | #endif 117 | 118 | 119 | /* Start the encoder */ 120 | /* rc is the range coder to be used */ 121 | /* c is written as first byte in the datastream (header,...) */ 122 | void start_encoding( rangecoder *rc, char c, int initlength); 123 | 124 | 125 | /* Encode a symbol using frequencies */ 126 | /* rc is the range coder to be used */ 127 | /* sy_f is the interval length (frequency of the symbol) */ 128 | /* lt_f is the lower end (frequency sum of < symbols) */ 129 | /* tot_f is the total interval length (total frequency sum) */ 130 | /* or (a lot faster): tot_f = 1< 2 | #include 3 | 4 | uint8_t bpe_flags[8192]; 5 | 6 | static inline void set_bpe(uint8_t a,uint8_t b) 7 | { 8 | int ab=a; 9 | ab<<=5; 10 | ab+=b>>3; 11 | bpe_flags[ab]|=(1<<(b&7)); 12 | } 13 | 14 | static inline void unset_bpe(uint8_t a,uint8_t b) 15 | { 16 | int ab=a; 17 | ab<<=5; 18 | ab+=b>>3; 19 | bpe_flags[ab]&=~(1<<(b&7)); 20 | } 21 | 22 | static inline int has_bpe(uint8_t a,uint8_t b) 23 | { 24 | int ab=a; 25 | ab<<=5; 26 | ab+=b>>3; 27 | return bpe_flags[ab]&(1<<(b&7)); 28 | } 29 | 30 | #define BPE 1024 31 | int bpe_last_ofs[BPE]; 32 | int bpe_num; 33 | int bpe_head; 34 | 35 | void bpe_init() { 36 | bpe_num=0; 37 | bpe_head=0; 38 | memset(bpe_flags,0,8192); 39 | } 40 | 41 | void bpe_push(uint8_t *buf, int pos) 42 | { 43 | if (pos<2) return; 44 | uint8_t a=buf[pos-2]; 45 | uint8_t b=buf[pos-1]; 46 | if (has_bpe(a,b)) { 47 | return; 48 | } 49 | if (bpe_num==BPE) { 50 | int prev_pos=bpe_last_ofs[bpe_head]; 51 | uint8_t pa=buf[prev_pos]; 52 | uint8_t pb=buf[prev_pos+1]; 53 | unset_bpe(pa,pb); 54 | } 55 | bpe_last_ofs[bpe_head++]=pos-2; 56 | if (bpe_head==BPE) bpe_head=0; 57 | if (bpe_num 28 | #include 29 | #ifdef _OPENMP 30 | # include 31 | #endif 32 | #include "divsufsort.h" 33 | 34 | 35 | /*- Constants -*/ 36 | #define INLINE __inline 37 | #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) 38 | # undef ALPHABET_SIZE 39 | #endif 40 | #if !defined(ALPHABET_SIZE) 41 | # define ALPHABET_SIZE (256) 42 | #endif 43 | #define BUCKET_A_SIZE (ALPHABET_SIZE) 44 | #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) 45 | #if defined(SS_INSERTIONSORT_THRESHOLD) 46 | # if SS_INSERTIONSORT_THRESHOLD < 1 47 | # undef SS_INSERTIONSORT_THRESHOLD 48 | # define SS_INSERTIONSORT_THRESHOLD (1) 49 | # endif 50 | #else 51 | # define SS_INSERTIONSORT_THRESHOLD (8) 52 | #endif 53 | #if defined(SS_BLOCKSIZE) 54 | # if SS_BLOCKSIZE < 0 55 | # undef SS_BLOCKSIZE 56 | # define SS_BLOCKSIZE (0) 57 | # elif 32768 <= SS_BLOCKSIZE 58 | # undef SS_BLOCKSIZE 59 | # define SS_BLOCKSIZE (32767) 60 | # endif 61 | #else 62 | # define SS_BLOCKSIZE (1024) 63 | #endif 64 | /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ 65 | #if SS_BLOCKSIZE == 0 66 | # define SS_MISORT_STACKSIZE (96) 67 | #elif SS_BLOCKSIZE <= 4096 68 | # define SS_MISORT_STACKSIZE (16) 69 | #else 70 | # define SS_MISORT_STACKSIZE (24) 71 | #endif 72 | #define SS_SMERGE_STACKSIZE (32) 73 | #define TR_INSERTIONSORT_THRESHOLD (8) 74 | #define TR_STACKSIZE (64) 75 | 76 | 77 | /*- Macros -*/ 78 | #ifndef SWAP 79 | # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) 80 | #endif /* SWAP */ 81 | #ifndef MIN 82 | # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) 83 | #endif /* MIN */ 84 | #ifndef MAX 85 | # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) 86 | #endif /* MAX */ 87 | #define STACK_PUSH(_a, _b, _c, _d)\ 88 | do {\ 89 | stack[ssize].a = (_a), stack[ssize].b = (_b),\ 90 | stack[ssize].c = (_c), stack[ssize++].d = (_d);\ 91 | } while(0) 92 | #define STACK_PUSH5(_a, _b, _c, _d, _e)\ 93 | do {\ 94 | stack[ssize].a = (_a), stack[ssize].b = (_b),\ 95 | stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ 96 | } while(0) 97 | #define STACK_POP(_a, _b, _c, _d)\ 98 | do {\ 99 | if(ssize == 0) { return; }\ 100 | (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ 101 | (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ 102 | } while(0) 103 | #define STACK_POP5(_a, _b, _c, _d, _e)\ 104 | do {\ 105 | if(ssize == 0) { return; }\ 106 | (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ 107 | (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ 108 | } while(0) 109 | #define BUCKET_A(_c0) bucket_A[(_c0)] 110 | #if ALPHABET_SIZE == 256 111 | #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) 112 | #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) 113 | #else 114 | #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) 115 | #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) 116 | #endif 117 | 118 | 119 | /*- Private Functions -*/ 120 | 121 | static const int lg_table[256]= { 122 | -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 123 | 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 124 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 125 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 126 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 127 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 128 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 129 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 130 | }; 131 | 132 | #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) 133 | 134 | static INLINE 135 | int 136 | ss_ilg(int n) { 137 | #if SS_BLOCKSIZE == 0 138 | return (n & 0xffff0000) ? 139 | ((n & 0xff000000) ? 140 | 24 + lg_table[(n >> 24) & 0xff] : 141 | 16 + lg_table[(n >> 16) & 0xff]) : 142 | ((n & 0x0000ff00) ? 143 | 8 + lg_table[(n >> 8) & 0xff] : 144 | 0 + lg_table[(n >> 0) & 0xff]); 145 | #elif SS_BLOCKSIZE < 256 146 | return lg_table[n]; 147 | #else 148 | return (n & 0xff00) ? 149 | 8 + lg_table[(n >> 8) & 0xff] : 150 | 0 + lg_table[(n >> 0) & 0xff]; 151 | #endif 152 | } 153 | 154 | #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ 155 | 156 | #if SS_BLOCKSIZE != 0 157 | 158 | static const int sqq_table[256] = { 159 | 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, 160 | 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, 161 | 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, 162 | 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 163 | 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 164 | 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, 165 | 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, 166 | 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 167 | 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, 168 | 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, 169 | 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, 170 | 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, 171 | 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, 172 | 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, 173 | 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 174 | 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 175 | }; 176 | 177 | static INLINE 178 | int 179 | ss_isqrt(int x) { 180 | int y, e; 181 | 182 | if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } 183 | e = (x & 0xffff0000) ? 184 | ((x & 0xff000000) ? 185 | 24 + lg_table[(x >> 24) & 0xff] : 186 | 16 + lg_table[(x >> 16) & 0xff]) : 187 | ((x & 0x0000ff00) ? 188 | 8 + lg_table[(x >> 8) & 0xff] : 189 | 0 + lg_table[(x >> 0) & 0xff]); 190 | 191 | if(e >= 16) { 192 | y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); 193 | if(e >= 24) { y = (y + 1 + x / y) >> 1; } 194 | y = (y + 1 + x / y) >> 1; 195 | } else if(e >= 8) { 196 | y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; 197 | } else { 198 | return sqq_table[x] >> 4; 199 | } 200 | 201 | return (x < (y * y)) ? y - 1 : y; 202 | } 203 | 204 | #endif /* SS_BLOCKSIZE != 0 */ 205 | 206 | 207 | /*---------------------------------------------------------------------------*/ 208 | 209 | /* Compares two suffixes. */ 210 | static INLINE 211 | int 212 | ss_compare(const unsigned char *T, 213 | const int *p1, const int *p2, 214 | int depth) { 215 | const unsigned char *U1, *U2, *U1n, *U2n; 216 | 217 | for(U1 = T + depth + *p1, 218 | U2 = T + depth + *p2, 219 | U1n = T + *(p1 + 1) + 2, 220 | U2n = T + *(p2 + 1) + 2; 221 | (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); 222 | ++U1, ++U2) { 223 | } 224 | 225 | return U1 < U1n ? 226 | (U2 < U2n ? *U1 - *U2 : 1) : 227 | (U2 < U2n ? -1 : 0); 228 | } 229 | 230 | 231 | /*---------------------------------------------------------------------------*/ 232 | 233 | #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) 234 | 235 | /* Insertionsort for small size groups */ 236 | static 237 | void 238 | ss_insertionsort(const unsigned char *T, const int *PA, 239 | int *first, int *last, int depth) { 240 | int *i, *j; 241 | int t; 242 | int r; 243 | 244 | for(i = last - 2; first <= i; --i) { 245 | for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { 246 | do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); 247 | if(last <= j) { break; } 248 | } 249 | if(r == 0) { *j = ~*j; } 250 | *(j - 1) = t; 251 | } 252 | } 253 | 254 | #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ 255 | 256 | 257 | /*---------------------------------------------------------------------------*/ 258 | 259 | #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) 260 | 261 | static INLINE 262 | void 263 | ss_fixdown(const unsigned char *Td, const int *PA, 264 | int *SA, int i, int size) { 265 | int j, k; 266 | int v; 267 | int c, d, e; 268 | 269 | for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { 270 | d = Td[PA[SA[k = j++]]]; 271 | if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } 272 | if(d <= c) { break; } 273 | } 274 | SA[i] = v; 275 | } 276 | 277 | /* Simple top-down heapsort. */ 278 | static 279 | void 280 | ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { 281 | int i, m; 282 | int t; 283 | 284 | m = size; 285 | if((size % 2) == 0) { 286 | m--; 287 | if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } 288 | } 289 | 290 | for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } 291 | if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } 292 | for(i = m - 1; 0 < i; --i) { 293 | t = SA[0], SA[0] = SA[i]; 294 | ss_fixdown(Td, PA, SA, 0, i); 295 | SA[i] = t; 296 | } 297 | } 298 | 299 | 300 | /*---------------------------------------------------------------------------*/ 301 | 302 | /* Returns the median of three elements. */ 303 | static INLINE 304 | int * 305 | ss_median3(const unsigned char *Td, const int *PA, 306 | int *v1, int *v2, int *v3) { 307 | int *t; 308 | if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } 309 | if(Td[PA[*v2]] > Td[PA[*v3]]) { 310 | if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } 311 | else { return v3; } 312 | } 313 | return v2; 314 | } 315 | 316 | /* Returns the median of five elements. */ 317 | static INLINE 318 | int * 319 | ss_median5(const unsigned char *Td, const int *PA, 320 | int *v1, int *v2, int *v3, int *v4, int *v5) { 321 | int *t; 322 | if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } 323 | if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } 324 | if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } 325 | if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } 326 | if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } 327 | if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } 328 | return v3; 329 | } 330 | 331 | /* Returns the pivot element. */ 332 | static INLINE 333 | int * 334 | ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { 335 | int *middle; 336 | int t; 337 | 338 | t = last - first; 339 | middle = first + t / 2; 340 | 341 | if(t <= 512) { 342 | if(t <= 32) { 343 | return ss_median3(Td, PA, first, middle, last - 1); 344 | } else { 345 | t >>= 2; 346 | return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); 347 | } 348 | } 349 | t >>= 3; 350 | first = ss_median3(Td, PA, first, first + t, first + (t << 1)); 351 | middle = ss_median3(Td, PA, middle - t, middle, middle + t); 352 | last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); 353 | return ss_median3(Td, PA, first, middle, last); 354 | } 355 | 356 | 357 | /*---------------------------------------------------------------------------*/ 358 | 359 | /* Binary partition for substrings. */ 360 | static INLINE 361 | int * 362 | ss_partition(const int *PA, 363 | int *first, int *last, int depth) { 364 | int *a, *b; 365 | int t; 366 | for(a = first - 1, b = last;;) { 367 | for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } 368 | for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } 369 | if(b <= a) { break; } 370 | t = ~*b; 371 | *b = *a; 372 | *a = t; 373 | } 374 | if(first < a) { *first = ~*first; } 375 | return a; 376 | } 377 | 378 | /* Multikey introsort for medium size groups. */ 379 | static 380 | void 381 | ss_mintrosort(const unsigned char *T, const int *PA, 382 | int *first, int *last, 383 | int depth) { 384 | #define STACK_SIZE SS_MISORT_STACKSIZE 385 | struct { int *a, *b, c; int d; } stack[STACK_SIZE]; 386 | const unsigned char *Td; 387 | int *a, *b, *c, *d, *e, *f; 388 | int s, t; 389 | int ssize; 390 | int limit; 391 | int v, x = 0; 392 | 393 | for(ssize = 0, limit = ss_ilg(last - first);;) { 394 | 395 | if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { 396 | #if 1 < SS_INSERTIONSORT_THRESHOLD 397 | if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } 398 | #endif 399 | STACK_POP(first, last, depth, limit); 400 | continue; 401 | } 402 | 403 | Td = T + depth; 404 | if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } 405 | if(limit < 0) { 406 | for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { 407 | if((x = Td[PA[*a]]) != v) { 408 | if(1 < (a - first)) { break; } 409 | v = x; 410 | first = a; 411 | } 412 | } 413 | if(Td[PA[*first] - 1] < v) { 414 | first = ss_partition(PA, first, a, depth); 415 | } 416 | if((a - first) <= (last - a)) { 417 | if(1 < (a - first)) { 418 | STACK_PUSH(a, last, depth, -1); 419 | last = a, depth += 1, limit = ss_ilg(a - first); 420 | } else { 421 | first = a, limit = -1; 422 | } 423 | } else { 424 | if(1 < (last - a)) { 425 | STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); 426 | first = a, limit = -1; 427 | } else { 428 | last = a, depth += 1, limit = ss_ilg(a - first); 429 | } 430 | } 431 | continue; 432 | } 433 | 434 | /* choose pivot */ 435 | a = ss_pivot(Td, PA, first, last); 436 | v = Td[PA[*a]]; 437 | SWAP(*first, *a); 438 | 439 | /* partition */ 440 | for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } 441 | if(((a = b) < last) && (x < v)) { 442 | for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { 443 | if(x == v) { SWAP(*b, *a); ++a; } 444 | } 445 | } 446 | for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } 447 | if((b < (d = c)) && (x > v)) { 448 | for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { 449 | if(x == v) { SWAP(*c, *d); --d; } 450 | } 451 | } 452 | for(; b < c;) { 453 | SWAP(*b, *c); 454 | for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { 455 | if(x == v) { SWAP(*b, *a); ++a; } 456 | } 457 | for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { 458 | if(x == v) { SWAP(*c, *d); --d; } 459 | } 460 | } 461 | 462 | if(a <= d) { 463 | c = b - 1; 464 | 465 | if((s = a - first) > (t = b - a)) { s = t; } 466 | for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 467 | if((s = d - c) > (t = last - d - 1)) { s = t; } 468 | for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 469 | 470 | a = first + (b - a), c = last - (d - c); 471 | b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); 472 | 473 | if((a - first) <= (last - c)) { 474 | if((last - c) <= (c - b)) { 475 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 476 | STACK_PUSH(c, last, depth, limit); 477 | last = a; 478 | } else if((a - first) <= (c - b)) { 479 | STACK_PUSH(c, last, depth, limit); 480 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 481 | last = a; 482 | } else { 483 | STACK_PUSH(c, last, depth, limit); 484 | STACK_PUSH(first, a, depth, limit); 485 | first = b, last = c, depth += 1, limit = ss_ilg(c - b); 486 | } 487 | } else { 488 | if((a - first) <= (c - b)) { 489 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 490 | STACK_PUSH(first, a, depth, limit); 491 | first = c; 492 | } else if((last - c) <= (c - b)) { 493 | STACK_PUSH(first, a, depth, limit); 494 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 495 | first = c; 496 | } else { 497 | STACK_PUSH(first, a, depth, limit); 498 | STACK_PUSH(c, last, depth, limit); 499 | first = b, last = c, depth += 1, limit = ss_ilg(c - b); 500 | } 501 | } 502 | } else { 503 | limit += 1; 504 | if(Td[PA[*first] - 1] < v) { 505 | first = ss_partition(PA, first, last, depth); 506 | limit = ss_ilg(last - first); 507 | } 508 | depth += 1; 509 | } 510 | } 511 | #undef STACK_SIZE 512 | } 513 | 514 | #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ 515 | 516 | 517 | /*---------------------------------------------------------------------------*/ 518 | 519 | #if SS_BLOCKSIZE != 0 520 | 521 | static INLINE 522 | void 523 | ss_blockswap(int *a, int *b, int n) { 524 | int t; 525 | for(; 0 < n; --n, ++a, ++b) { 526 | t = *a, *a = *b, *b = t; 527 | } 528 | } 529 | 530 | static INLINE 531 | void 532 | ss_rotate(int *first, int *middle, int *last) { 533 | int *a, *b, t; 534 | int l, r; 535 | l = middle - first, r = last - middle; 536 | for(; (0 < l) && (0 < r);) { 537 | if(l == r) { ss_blockswap(first, middle, l); break; } 538 | if(l < r) { 539 | a = last - 1, b = middle - 1; 540 | t = *a; 541 | do { 542 | *a-- = *b, *b-- = *a; 543 | if(b < first) { 544 | *a = t; 545 | last = a; 546 | if((r -= l + 1) <= l) { break; } 547 | a -= 1, b = middle - 1; 548 | t = *a; 549 | } 550 | } while(1); 551 | } else { 552 | a = first, b = middle; 553 | t = *a; 554 | do { 555 | *a++ = *b, *b++ = *a; 556 | if(last <= b) { 557 | *a = t; 558 | first = a + 1; 559 | if((l -= r + 1) <= r) { break; } 560 | a += 1, b = middle; 561 | t = *a; 562 | } 563 | } while(1); 564 | } 565 | } 566 | } 567 | 568 | 569 | /*---------------------------------------------------------------------------*/ 570 | 571 | static 572 | void 573 | ss_inplacemerge(const unsigned char *T, const int *PA, 574 | int *first, int *middle, int *last, 575 | int depth) { 576 | const int *p; 577 | int *a, *b; 578 | int len, half; 579 | int q, r; 580 | int x; 581 | 582 | for(;;) { 583 | if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } 584 | else { x = 0; p = PA + *(last - 1); } 585 | for(a = first, len = middle - first, half = len >> 1, r = -1; 586 | 0 < len; 587 | len = half, half >>= 1) { 588 | b = a + half; 589 | q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); 590 | if(q < 0) { 591 | a = b + 1; 592 | half -= (len & 1) ^ 1; 593 | } else { 594 | r = q; 595 | } 596 | } 597 | if(a < middle) { 598 | if(r == 0) { *a = ~*a; } 599 | ss_rotate(a, middle, last); 600 | last -= middle - a; 601 | middle = a; 602 | if(first == middle) { break; } 603 | } 604 | --last; 605 | if(x != 0) { while(*--last < 0) { } } 606 | if(middle == last) { break; } 607 | } 608 | } 609 | 610 | 611 | /*---------------------------------------------------------------------------*/ 612 | 613 | /* Merge-forward with internal buffer. */ 614 | static 615 | void 616 | ss_mergeforward(const unsigned char *T, const int *PA, 617 | int *first, int *middle, int *last, 618 | int *buf, int depth) { 619 | int *a, *b, *c, *bufend; 620 | int t; 621 | int r; 622 | 623 | bufend = buf + (middle - first) - 1; 624 | ss_blockswap(buf, first, middle - first); 625 | 626 | for(t = *(a = first), b = buf, c = middle;;) { 627 | r = ss_compare(T, PA + *b, PA + *c, depth); 628 | if(r < 0) { 629 | do { 630 | *a++ = *b; 631 | if(bufend <= b) { *bufend = t; return; } 632 | *b++ = *a; 633 | } while(*b < 0); 634 | } else if(r > 0) { 635 | do { 636 | *a++ = *c, *c++ = *a; 637 | if(last <= c) { 638 | while(b < bufend) { *a++ = *b, *b++ = *a; } 639 | *a = *b, *b = t; 640 | return; 641 | } 642 | } while(*c < 0); 643 | } else { 644 | *c = ~*c; 645 | do { 646 | *a++ = *b; 647 | if(bufend <= b) { *bufend = t; return; } 648 | *b++ = *a; 649 | } while(*b < 0); 650 | 651 | do { 652 | *a++ = *c, *c++ = *a; 653 | if(last <= c) { 654 | while(b < bufend) { *a++ = *b, *b++ = *a; } 655 | *a = *b, *b = t; 656 | return; 657 | } 658 | } while(*c < 0); 659 | } 660 | } 661 | } 662 | 663 | /* Merge-backward with internal buffer. */ 664 | static 665 | void 666 | ss_mergebackward(const unsigned char *T, const int *PA, 667 | int *first, int *middle, int *last, 668 | int *buf, int depth) { 669 | const int *p1, *p2; 670 | int *a, *b, *c, *bufend; 671 | int t; 672 | int r; 673 | int x; 674 | 675 | bufend = buf + (last - middle) - 1; 676 | ss_blockswap(buf, middle, last - middle); 677 | 678 | x = 0; 679 | if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } 680 | else { p1 = PA + *bufend; } 681 | if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } 682 | else { p2 = PA + *(middle - 1); } 683 | for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { 684 | r = ss_compare(T, p1, p2, depth); 685 | if(0 < r) { 686 | if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } 687 | *a-- = *b; 688 | if(b <= buf) { *buf = t; break; } 689 | *b-- = *a; 690 | if(*b < 0) { p1 = PA + ~*b; x |= 1; } 691 | else { p1 = PA + *b; } 692 | } else if(r < 0) { 693 | if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } 694 | *a-- = *c, *c-- = *a; 695 | if(c < first) { 696 | while(buf < b) { *a-- = *b, *b-- = *a; } 697 | *a = *b, *b = t; 698 | break; 699 | } 700 | if(*c < 0) { p2 = PA + ~*c; x |= 2; } 701 | else { p2 = PA + *c; } 702 | } else { 703 | if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } 704 | *a-- = ~*b; 705 | if(b <= buf) { *buf = t; break; } 706 | *b-- = *a; 707 | if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } 708 | *a-- = *c, *c-- = *a; 709 | if(c < first) { 710 | while(buf < b) { *a-- = *b, *b-- = *a; } 711 | *a = *b, *b = t; 712 | break; 713 | } 714 | if(*b < 0) { p1 = PA + ~*b; x |= 1; } 715 | else { p1 = PA + *b; } 716 | if(*c < 0) { p2 = PA + ~*c; x |= 2; } 717 | else { p2 = PA + *c; } 718 | } 719 | } 720 | } 721 | 722 | /* D&C based merge. */ 723 | static 724 | void 725 | ss_swapmerge(const unsigned char *T, const int *PA, 726 | int *first, int *middle, int *last, 727 | int *buf, int bufsize, int depth) { 728 | #define STACK_SIZE SS_SMERGE_STACKSIZE 729 | #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) 730 | #define MERGE_CHECK(a, b, c)\ 731 | do {\ 732 | if(((c) & 1) ||\ 733 | (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ 734 | *(a) = ~*(a);\ 735 | }\ 736 | if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ 737 | *(b) = ~*(b);\ 738 | }\ 739 | } while(0) 740 | struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; 741 | int *l, *r, *lm, *rm; 742 | int m, len, half; 743 | int ssize; 744 | int check, next; 745 | 746 | for(check = 0, ssize = 0;;) { 747 | if((last - middle) <= bufsize) { 748 | if((first < middle) && (middle < last)) { 749 | ss_mergebackward(T, PA, first, middle, last, buf, depth); 750 | } 751 | MERGE_CHECK(first, last, check); 752 | STACK_POP(first, middle, last, check); 753 | continue; 754 | } 755 | 756 | if((middle - first) <= bufsize) { 757 | if(first < middle) { 758 | ss_mergeforward(T, PA, first, middle, last, buf, depth); 759 | } 760 | MERGE_CHECK(first, last, check); 761 | STACK_POP(first, middle, last, check); 762 | continue; 763 | } 764 | 765 | for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; 766 | 0 < len; 767 | len = half, half >>= 1) { 768 | if(ss_compare(T, PA + GETIDX(*(middle + m + half)), 769 | PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { 770 | m += half + 1; 771 | half -= (len & 1) ^ 1; 772 | } 773 | } 774 | 775 | if(0 < m) { 776 | lm = middle - m, rm = middle + m; 777 | ss_blockswap(lm, middle, m); 778 | l = r = middle, next = 0; 779 | if(rm < last) { 780 | if(*rm < 0) { 781 | *rm = ~*rm; 782 | if(first < lm) { for(; *--l < 0;) { } next |= 4; } 783 | next |= 1; 784 | } else if(first < lm) { 785 | for(; *r < 0; ++r) { } 786 | next |= 2; 787 | } 788 | } 789 | 790 | if((l - first) <= (last - r)) { 791 | STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); 792 | middle = lm, last = l, check = (check & 3) | (next & 4); 793 | } else { 794 | if((next & 2) && (r == middle)) { next ^= 6; } 795 | STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); 796 | first = r, middle = rm, check = (next & 3) | (check & 4); 797 | } 798 | } else { 799 | if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { 800 | *middle = ~*middle; 801 | } 802 | MERGE_CHECK(first, last, check); 803 | STACK_POP(first, middle, last, check); 804 | } 805 | } 806 | #undef STACK_SIZE 807 | } 808 | 809 | #endif /* SS_BLOCKSIZE != 0 */ 810 | 811 | 812 | /*---------------------------------------------------------------------------*/ 813 | 814 | /* Substring sort */ 815 | static 816 | void 817 | sssort(const unsigned char *T, const int *PA, 818 | int *first, int *last, 819 | int *buf, int bufsize, 820 | int depth, int n, int lastsuffix) { 821 | int *a; 822 | #if SS_BLOCKSIZE != 0 823 | int *b, *middle, *curbuf; 824 | int j, k, curbufsize, limit; 825 | #endif 826 | int i; 827 | 828 | if(lastsuffix != 0) { ++first; } 829 | 830 | #if SS_BLOCKSIZE == 0 831 | ss_mintrosort(T, PA, first, last, depth); 832 | #else 833 | if((bufsize < SS_BLOCKSIZE) && 834 | (bufsize < (last - first)) && 835 | (bufsize < (limit = ss_isqrt(last - first)))) { 836 | if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } 837 | buf = middle = last - limit, bufsize = limit; 838 | } else { 839 | middle = last, limit = 0; 840 | } 841 | for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { 842 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 843 | ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); 844 | #elif 1 < SS_BLOCKSIZE 845 | ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); 846 | #endif 847 | curbufsize = last - (a + SS_BLOCKSIZE); 848 | curbuf = a + SS_BLOCKSIZE; 849 | if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } 850 | for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { 851 | ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); 852 | } 853 | } 854 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 855 | ss_mintrosort(T, PA, a, middle, depth); 856 | #elif 1 < SS_BLOCKSIZE 857 | ss_insertionsort(T, PA, a, middle, depth); 858 | #endif 859 | for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { 860 | if(i & 1) { 861 | ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); 862 | a -= k; 863 | } 864 | } 865 | if(limit != 0) { 866 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 867 | ss_mintrosort(T, PA, middle, last, depth); 868 | #elif 1 < SS_BLOCKSIZE 869 | ss_insertionsort(T, PA, middle, last, depth); 870 | #endif 871 | ss_inplacemerge(T, PA, first, middle, last, depth); 872 | } 873 | #endif 874 | 875 | if(lastsuffix != 0) { 876 | /* Insert last type B* suffix. */ 877 | int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; 878 | for(a = first, i = *(first - 1); 879 | (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); 880 | ++a) { 881 | *(a - 1) = *a; 882 | } 883 | *(a - 1) = i; 884 | } 885 | } 886 | 887 | 888 | /*---------------------------------------------------------------------------*/ 889 | 890 | static INLINE 891 | int 892 | tr_ilg(int n) { 893 | return (n & 0xffff0000) ? 894 | ((n & 0xff000000) ? 895 | 24 + lg_table[(n >> 24) & 0xff] : 896 | 16 + lg_table[(n >> 16) & 0xff]) : 897 | ((n & 0x0000ff00) ? 898 | 8 + lg_table[(n >> 8) & 0xff] : 899 | 0 + lg_table[(n >> 0) & 0xff]); 900 | } 901 | 902 | 903 | /*---------------------------------------------------------------------------*/ 904 | 905 | /* Simple insertionsort for small size groups. */ 906 | static 907 | void 908 | tr_insertionsort(const int *ISAd, int *first, int *last) { 909 | int *a, *b; 910 | int t, r; 911 | 912 | for(a = first + 1; a < last; ++a) { 913 | for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { 914 | do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); 915 | if(b < first) { break; } 916 | } 917 | if(r == 0) { *b = ~*b; } 918 | *(b + 1) = t; 919 | } 920 | } 921 | 922 | 923 | /*---------------------------------------------------------------------------*/ 924 | 925 | static INLINE 926 | void 927 | tr_fixdown(const int *ISAd, int *SA, int i, int size) { 928 | int j, k; 929 | int v; 930 | int c, d, e; 931 | 932 | for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { 933 | d = ISAd[SA[k = j++]]; 934 | if(d < (e = ISAd[SA[j]])) { k = j; d = e; } 935 | if(d <= c) { break; } 936 | } 937 | SA[i] = v; 938 | } 939 | 940 | /* Simple top-down heapsort. */ 941 | static 942 | void 943 | tr_heapsort(const int *ISAd, int *SA, int size) { 944 | int i, m; 945 | int t; 946 | 947 | m = size; 948 | if((size % 2) == 0) { 949 | m--; 950 | if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } 951 | } 952 | 953 | for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } 954 | if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } 955 | for(i = m - 1; 0 < i; --i) { 956 | t = SA[0], SA[0] = SA[i]; 957 | tr_fixdown(ISAd, SA, 0, i); 958 | SA[i] = t; 959 | } 960 | } 961 | 962 | 963 | /*---------------------------------------------------------------------------*/ 964 | 965 | /* Returns the median of three elements. */ 966 | static INLINE 967 | int * 968 | tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { 969 | int *t; 970 | if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } 971 | if(ISAd[*v2] > ISAd[*v3]) { 972 | if(ISAd[*v1] > ISAd[*v3]) { return v1; } 973 | else { return v3; } 974 | } 975 | return v2; 976 | } 977 | 978 | /* Returns the median of five elements. */ 979 | static INLINE 980 | int * 981 | tr_median5(const int *ISAd, 982 | int *v1, int *v2, int *v3, int *v4, int *v5) { 983 | int *t; 984 | if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } 985 | if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } 986 | if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } 987 | if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } 988 | if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } 989 | if(ISAd[*v3] > ISAd[*v4]) { return v4; } 990 | return v3; 991 | } 992 | 993 | /* Returns the pivot element. */ 994 | static INLINE 995 | int * 996 | tr_pivot(const int *ISAd, int *first, int *last) { 997 | int *middle; 998 | int t; 999 | 1000 | t = last - first; 1001 | middle = first + t / 2; 1002 | 1003 | if(t <= 512) { 1004 | if(t <= 32) { 1005 | return tr_median3(ISAd, first, middle, last - 1); 1006 | } else { 1007 | t >>= 2; 1008 | return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); 1009 | } 1010 | } 1011 | t >>= 3; 1012 | first = tr_median3(ISAd, first, first + t, first + (t << 1)); 1013 | middle = tr_median3(ISAd, middle - t, middle, middle + t); 1014 | last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); 1015 | return tr_median3(ISAd, first, middle, last); 1016 | } 1017 | 1018 | 1019 | /*---------------------------------------------------------------------------*/ 1020 | 1021 | typedef struct _trbudget_t trbudget_t; 1022 | struct _trbudget_t { 1023 | int chance; 1024 | int remain; 1025 | int incval; 1026 | int count; 1027 | }; 1028 | 1029 | static INLINE 1030 | void 1031 | trbudget_init(trbudget_t *budget, int chance, int incval) { 1032 | budget->chance = chance; 1033 | budget->remain = budget->incval = incval; 1034 | } 1035 | 1036 | static INLINE 1037 | int 1038 | trbudget_check(trbudget_t *budget, int size) { 1039 | if(size <= budget->remain) { budget->remain -= size; return 1; } 1040 | if(budget->chance == 0) { budget->count += size; return 0; } 1041 | budget->remain += budget->incval - size; 1042 | budget->chance -= 1; 1043 | return 1; 1044 | } 1045 | 1046 | 1047 | /*---------------------------------------------------------------------------*/ 1048 | 1049 | static INLINE 1050 | void 1051 | tr_partition(const int *ISAd, 1052 | int *first, int *middle, int *last, 1053 | int **pa, int **pb, int v) { 1054 | int *a, *b, *c, *d, *e, *f; 1055 | int t, s; 1056 | int x = 0; 1057 | 1058 | for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } 1059 | if(((a = b) < last) && (x < v)) { 1060 | for(; (++b < last) && ((x = ISAd[*b]) <= v);) { 1061 | if(x == v) { SWAP(*b, *a); ++a; } 1062 | } 1063 | } 1064 | for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } 1065 | if((b < (d = c)) && (x > v)) { 1066 | for(; (b < --c) && ((x = ISAd[*c]) >= v);) { 1067 | if(x == v) { SWAP(*c, *d); --d; } 1068 | } 1069 | } 1070 | for(; b < c;) { 1071 | SWAP(*b, *c); 1072 | for(; (++b < c) && ((x = ISAd[*b]) <= v);) { 1073 | if(x == v) { SWAP(*b, *a); ++a; } 1074 | } 1075 | for(; (b < --c) && ((x = ISAd[*c]) >= v);) { 1076 | if(x == v) { SWAP(*c, *d); --d; } 1077 | } 1078 | } 1079 | 1080 | if(a <= d) { 1081 | c = b - 1; 1082 | if((s = a - first) > (t = b - a)) { s = t; } 1083 | for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 1084 | if((s = d - c) > (t = last - d - 1)) { s = t; } 1085 | for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 1086 | first += (b - a), last -= (d - c); 1087 | } 1088 | *pa = first, *pb = last; 1089 | } 1090 | 1091 | static 1092 | void 1093 | tr_copy(int *ISA, const int *SA, 1094 | int *first, int *a, int *b, int *last, 1095 | int depth) { 1096 | /* sort suffixes of middle partition 1097 | by using sorted order of suffixes of left and right partition. */ 1098 | int *c, *d, *e; 1099 | int s, v; 1100 | 1101 | v = b - SA - 1; 1102 | for(c = first, d = a - 1; c <= d; ++c) { 1103 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 1104 | *++d = s; 1105 | ISA[s] = d - SA; 1106 | } 1107 | } 1108 | for(c = last - 1, e = d + 1, d = b; e < d; --c) { 1109 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 1110 | *--d = s; 1111 | ISA[s] = d - SA; 1112 | } 1113 | } 1114 | } 1115 | 1116 | static 1117 | void 1118 | tr_partialcopy(int *ISA, const int *SA, 1119 | int *first, int *a, int *b, int *last, 1120 | int depth) { 1121 | int *c, *d, *e; 1122 | int s, v; 1123 | int rank, lastrank, newrank = -1; 1124 | 1125 | v = b - SA - 1; 1126 | lastrank = -1; 1127 | for(c = first, d = a - 1; c <= d; ++c) { 1128 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 1129 | *++d = s; 1130 | rank = ISA[s + depth]; 1131 | if(lastrank != rank) { lastrank = rank; newrank = d - SA; } 1132 | ISA[s] = newrank; 1133 | } 1134 | } 1135 | 1136 | lastrank = -1; 1137 | for(e = d; first <= e; --e) { 1138 | rank = ISA[*e]; 1139 | if(lastrank != rank) { lastrank = rank; newrank = e - SA; } 1140 | if(newrank != rank) { ISA[*e] = newrank; } 1141 | } 1142 | 1143 | lastrank = -1; 1144 | for(c = last - 1, e = d + 1, d = b; e < d; --c) { 1145 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 1146 | *--d = s; 1147 | rank = ISA[s + depth]; 1148 | if(lastrank != rank) { lastrank = rank; newrank = d - SA; } 1149 | ISA[s] = newrank; 1150 | } 1151 | } 1152 | } 1153 | 1154 | static 1155 | void 1156 | tr_introsort(int *ISA, const int *ISAd, 1157 | int *SA, int *first, int *last, 1158 | trbudget_t *budget) { 1159 | #define STACK_SIZE TR_STACKSIZE 1160 | struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; 1161 | int *a, *b, *c; 1162 | int t; 1163 | int v, x = 0; 1164 | int incr = ISAd - ISA; 1165 | int limit, next; 1166 | int ssize, trlink = -1; 1167 | 1168 | for(ssize = 0, limit = tr_ilg(last - first);;) { 1169 | 1170 | if(limit < 0) { 1171 | if(limit == -1) { 1172 | /* tandem repeat partition */ 1173 | tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); 1174 | 1175 | /* update ranks */ 1176 | if(a < last) { 1177 | for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } 1178 | } 1179 | if(b < last) { 1180 | for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } 1181 | } 1182 | 1183 | /* push */ 1184 | if(1 < (b - a)) { 1185 | STACK_PUSH5(NULL, a, b, 0, 0); 1186 | STACK_PUSH5(ISAd - incr, first, last, -2, trlink); 1187 | trlink = ssize - 2; 1188 | } 1189 | if((a - first) <= (last - b)) { 1190 | if(1 < (a - first)) { 1191 | STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); 1192 | last = a, limit = tr_ilg(a - first); 1193 | } else if(1 < (last - b)) { 1194 | first = b, limit = tr_ilg(last - b); 1195 | } else { 1196 | STACK_POP5(ISAd, first, last, limit, trlink); 1197 | } 1198 | } else { 1199 | if(1 < (last - b)) { 1200 | STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); 1201 | first = b, limit = tr_ilg(last - b); 1202 | } else if(1 < (a - first)) { 1203 | last = a, limit = tr_ilg(a - first); 1204 | } else { 1205 | STACK_POP5(ISAd, first, last, limit, trlink); 1206 | } 1207 | } 1208 | } else if(limit == -2) { 1209 | /* tandem repeat copy */ 1210 | a = stack[--ssize].b, b = stack[ssize].c; 1211 | if(stack[ssize].d == 0) { 1212 | tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); 1213 | } else { 1214 | if(0 <= trlink) { stack[trlink].d = -1; } 1215 | tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); 1216 | } 1217 | STACK_POP5(ISAd, first, last, limit, trlink); 1218 | } else { 1219 | /* sorted partition */ 1220 | if(0 <= *first) { 1221 | a = first; 1222 | do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); 1223 | first = a; 1224 | } 1225 | if(first < last) { 1226 | a = first; do { *a = ~*a; } while(*++a < 0); 1227 | next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; 1228 | if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } 1229 | 1230 | /* push */ 1231 | if(trbudget_check(budget, a - first)) { 1232 | if((a - first) <= (last - a)) { 1233 | STACK_PUSH5(ISAd, a, last, -3, trlink); 1234 | ISAd += incr, last = a, limit = next; 1235 | } else { 1236 | if(1 < (last - a)) { 1237 | STACK_PUSH5(ISAd + incr, first, a, next, trlink); 1238 | first = a, limit = -3; 1239 | } else { 1240 | ISAd += incr, last = a, limit = next; 1241 | } 1242 | } 1243 | } else { 1244 | if(0 <= trlink) { stack[trlink].d = -1; } 1245 | if(1 < (last - a)) { 1246 | first = a, limit = -3; 1247 | } else { 1248 | STACK_POP5(ISAd, first, last, limit, trlink); 1249 | } 1250 | } 1251 | } else { 1252 | STACK_POP5(ISAd, first, last, limit, trlink); 1253 | } 1254 | } 1255 | continue; 1256 | } 1257 | 1258 | if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { 1259 | tr_insertionsort(ISAd, first, last); 1260 | limit = -3; 1261 | continue; 1262 | } 1263 | 1264 | if(limit-- == 0) { 1265 | tr_heapsort(ISAd, first, last - first); 1266 | for(a = last - 1; first < a; a = b) { 1267 | for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } 1268 | } 1269 | limit = -3; 1270 | continue; 1271 | } 1272 | 1273 | /* choose pivot */ 1274 | a = tr_pivot(ISAd, first, last); 1275 | SWAP(*first, *a); 1276 | v = ISAd[*first]; 1277 | 1278 | /* partition */ 1279 | tr_partition(ISAd, first, first + 1, last, &a, &b, v); 1280 | if((last - first) != (b - a)) { 1281 | next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; 1282 | 1283 | /* update ranks */ 1284 | for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } 1285 | if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } 1286 | 1287 | /* push */ 1288 | if((1 < (b - a)) && (trbudget_check(budget, b - a))) { 1289 | if((a - first) <= (last - b)) { 1290 | if((last - b) <= (b - a)) { 1291 | if(1 < (a - first)) { 1292 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 1293 | STACK_PUSH5(ISAd, b, last, limit, trlink); 1294 | last = a; 1295 | } else if(1 < (last - b)) { 1296 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 1297 | first = b; 1298 | } else { 1299 | ISAd += incr, first = a, last = b, limit = next; 1300 | } 1301 | } else if((a - first) <= (b - a)) { 1302 | if(1 < (a - first)) { 1303 | STACK_PUSH5(ISAd, b, last, limit, trlink); 1304 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 1305 | last = a; 1306 | } else { 1307 | STACK_PUSH5(ISAd, b, last, limit, trlink); 1308 | ISAd += incr, first = a, last = b, limit = next; 1309 | } 1310 | } else { 1311 | STACK_PUSH5(ISAd, b, last, limit, trlink); 1312 | STACK_PUSH5(ISAd, first, a, limit, trlink); 1313 | ISAd += incr, first = a, last = b, limit = next; 1314 | } 1315 | } else { 1316 | if((a - first) <= (b - a)) { 1317 | if(1 < (last - b)) { 1318 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 1319 | STACK_PUSH5(ISAd, first, a, limit, trlink); 1320 | first = b; 1321 | } else if(1 < (a - first)) { 1322 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 1323 | last = a; 1324 | } else { 1325 | ISAd += incr, first = a, last = b, limit = next; 1326 | } 1327 | } else if((last - b) <= (b - a)) { 1328 | if(1 < (last - b)) { 1329 | STACK_PUSH5(ISAd, first, a, limit, trlink); 1330 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 1331 | first = b; 1332 | } else { 1333 | STACK_PUSH5(ISAd, first, a, limit, trlink); 1334 | ISAd += incr, first = a, last = b, limit = next; 1335 | } 1336 | } else { 1337 | STACK_PUSH5(ISAd, first, a, limit, trlink); 1338 | STACK_PUSH5(ISAd, b, last, limit, trlink); 1339 | ISAd += incr, first = a, last = b, limit = next; 1340 | } 1341 | } 1342 | } else { 1343 | if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } 1344 | if((a - first) <= (last - b)) { 1345 | if(1 < (a - first)) { 1346 | STACK_PUSH5(ISAd, b, last, limit, trlink); 1347 | last = a; 1348 | } else if(1 < (last - b)) { 1349 | first = b; 1350 | } else { 1351 | STACK_POP5(ISAd, first, last, limit, trlink); 1352 | } 1353 | } else { 1354 | if(1 < (last - b)) { 1355 | STACK_PUSH5(ISAd, first, a, limit, trlink); 1356 | first = b; 1357 | } else if(1 < (a - first)) { 1358 | last = a; 1359 | } else { 1360 | STACK_POP5(ISAd, first, last, limit, trlink); 1361 | } 1362 | } 1363 | } 1364 | } else { 1365 | if(trbudget_check(budget, last - first)) { 1366 | limit = tr_ilg(last - first), ISAd += incr; 1367 | } else { 1368 | if(0 <= trlink) { stack[trlink].d = -1; } 1369 | STACK_POP5(ISAd, first, last, limit, trlink); 1370 | } 1371 | } 1372 | } 1373 | #undef STACK_SIZE 1374 | } 1375 | 1376 | 1377 | 1378 | /*---------------------------------------------------------------------------*/ 1379 | 1380 | /* Tandem repeat sort */ 1381 | static 1382 | void 1383 | trsort(int *ISA, int *SA, int n, int depth) { 1384 | int *ISAd; 1385 | int *first, *last; 1386 | trbudget_t budget; 1387 | int t, skip, unsorted; 1388 | 1389 | trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); 1390 | /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ 1391 | for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { 1392 | first = SA; 1393 | skip = 0; 1394 | unsorted = 0; 1395 | do { 1396 | if((t = *first) < 0) { first -= t; skip += t; } 1397 | else { 1398 | if(skip != 0) { *(first + skip) = skip; skip = 0; } 1399 | last = SA + ISA[t] + 1; 1400 | if(1 < (last - first)) { 1401 | budget.count = 0; 1402 | tr_introsort(ISA, ISAd, SA, first, last, &budget); 1403 | if(budget.count != 0) { unsorted += budget.count; } 1404 | else { skip = first - last; } 1405 | } else if((last - first) == 1) { 1406 | skip = -1; 1407 | } 1408 | first = last; 1409 | } 1410 | } while(first < (SA + n)); 1411 | if(skip != 0) { *(first + skip) = skip; } 1412 | if(unsorted == 0) { break; } 1413 | } 1414 | } 1415 | 1416 | 1417 | /*---------------------------------------------------------------------------*/ 1418 | 1419 | /* Sorts suffixes of type B*. */ 1420 | static 1421 | int 1422 | sort_typeBstar(const unsigned char *T, int *SA, 1423 | int *bucket_A, int *bucket_B, 1424 | int n) { 1425 | int *PAb, *ISAb, *buf; 1426 | #ifdef _OPENMP 1427 | int *curbuf; 1428 | int l; 1429 | #endif 1430 | int i, j, k, t, m, bufsize; 1431 | int c0, c1; 1432 | #ifdef _OPENMP 1433 | int d0, d1; 1434 | int tmp; 1435 | #endif 1436 | 1437 | /* Count the number of occurrences of the first one or two characters of each 1438 | type A, B and B* suffix. Moreover, store the beginning position of all 1439 | type B* suffixes into the array SA. */ 1440 | for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { 1441 | /* type A suffix. */ 1442 | do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); 1443 | if(0 <= i) { 1444 | /* type B* suffix. */ 1445 | ++BUCKET_BSTAR(c0, c1); 1446 | SA[--m] = i; 1447 | /* type B suffix. */ 1448 | for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { 1449 | ++BUCKET_B(c0, c1); 1450 | } 1451 | } 1452 | } 1453 | m = n - m; 1454 | /* 1455 | note: 1456 | A type B* suffix is lexicographically smaller than a type B suffix that 1457 | begins with the same first two characters. 1458 | */ 1459 | 1460 | /* Calculate the index of start/end point of each bucket. */ 1461 | for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { 1462 | t = i + BUCKET_A(c0); 1463 | BUCKET_A(c0) = i + j; /* start point */ 1464 | i = t + BUCKET_B(c0, c0); 1465 | for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { 1466 | j += BUCKET_BSTAR(c0, c1); 1467 | BUCKET_BSTAR(c0, c1) = j; /* end point */ 1468 | i += BUCKET_B(c0, c1); 1469 | } 1470 | } 1471 | 1472 | if(0 < m) { 1473 | /* Sort the type B* suffixes by their first two characters. */ 1474 | PAb = SA + n - m; ISAb = SA + m; 1475 | for(i = m - 2; 0 <= i; --i) { 1476 | t = PAb[i], c0 = T[t], c1 = T[t + 1]; 1477 | SA[--BUCKET_BSTAR(c0, c1)] = i; 1478 | } 1479 | t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; 1480 | SA[--BUCKET_BSTAR(c0, c1)] = m - 1; 1481 | 1482 | /* Sort the type B* substrings using sssort. */ 1483 | #ifdef _OPENMP 1484 | tmp = omp_get_max_threads(); 1485 | buf = SA + m, bufsize = (n - (2 * m)) / tmp; 1486 | c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; 1487 | #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) 1488 | { 1489 | tmp = omp_get_thread_num(); 1490 | curbuf = buf + tmp * bufsize; 1491 | k = 0; 1492 | for(;;) { 1493 | #pragma omp critical(sssort_lock) 1494 | { 1495 | if(0 < (l = j)) { 1496 | d0 = c0, d1 = c1; 1497 | do { 1498 | k = BUCKET_BSTAR(d0, d1); 1499 | if(--d1 <= d0) { 1500 | d1 = ALPHABET_SIZE - 1; 1501 | if(--d0 < 0) { break; } 1502 | } 1503 | } while(((l - k) <= 1) && (0 < (l = k))); 1504 | c0 = d0, c1 = d1, j = k; 1505 | } 1506 | } 1507 | if(l == 0) { break; } 1508 | sssort(T, PAb, SA + k, SA + l, 1509 | curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); 1510 | } 1511 | } 1512 | #else 1513 | buf = SA + m, bufsize = n - (2 * m); 1514 | for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { 1515 | for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { 1516 | i = BUCKET_BSTAR(c0, c1); 1517 | if(1 < (j - i)) { 1518 | sssort(T, PAb, SA + i, SA + j, 1519 | buf, bufsize, 2, n, *(SA + i) == (m - 1)); 1520 | } 1521 | } 1522 | } 1523 | #endif 1524 | 1525 | /* Compute ranks of type B* substrings. */ 1526 | for(i = m - 1; 0 <= i; --i) { 1527 | if(0 <= SA[i]) { 1528 | j = i; 1529 | do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); 1530 | SA[i + 1] = i - j; 1531 | if(i <= 0) { break; } 1532 | } 1533 | j = i; 1534 | do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); 1535 | ISAb[SA[i]] = j; 1536 | } 1537 | 1538 | /* Construct the inverse suffix array of type B* suffixes using trsort. */ 1539 | trsort(ISAb, SA, m, 1); 1540 | 1541 | /* Set the sorted order of tyoe B* suffixes. */ 1542 | for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { 1543 | for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } 1544 | if(0 <= i) { 1545 | t = i; 1546 | for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } 1547 | SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; 1548 | } 1549 | } 1550 | 1551 | /* Calculate the index of start/end point of each bucket. */ 1552 | BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ 1553 | for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { 1554 | i = BUCKET_A(c0 + 1) - 1; 1555 | for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { 1556 | t = i - BUCKET_B(c0, c1); 1557 | BUCKET_B(c0, c1) = i; /* end point */ 1558 | 1559 | /* Move all type B* suffixes to the correct position. */ 1560 | for(i = t, j = BUCKET_BSTAR(c0, c1); 1561 | j <= k; 1562 | --i, --k) { SA[i] = SA[k]; } 1563 | } 1564 | BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ 1565 | BUCKET_B(c0, c0) = i; /* end point */ 1566 | } 1567 | } 1568 | 1569 | return m; 1570 | } 1571 | 1572 | /* Constructs the suffix array by using the sorted order of type B* suffixes. */ 1573 | static 1574 | void 1575 | construct_SA(const unsigned char *T, int *SA, 1576 | int *bucket_A, int *bucket_B, 1577 | int n, int m) { 1578 | int *i, *j, *k; 1579 | int s; 1580 | int c0, c1, c2; 1581 | 1582 | if(0 < m) { 1583 | /* Construct the sorted order of type B suffixes by using 1584 | the sorted order of type B* suffixes. */ 1585 | for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { 1586 | /* Scan the suffix array from right to left. */ 1587 | for(i = SA + BUCKET_BSTAR(c1, c1 + 1), 1588 | j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; 1589 | i <= j; 1590 | --j) { 1591 | if(0 < (s = *j)) { 1592 | *j = ~s; 1593 | c0 = T[--s]; 1594 | if((0 < s) && (T[s - 1] > c0)) { s = ~s; } 1595 | if(c0 != c2) { 1596 | if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } 1597 | k = SA + BUCKET_B(c2 = c0, c1); 1598 | } 1599 | *k-- = s; 1600 | } else*j = ~s; 1601 | } 1602 | } 1603 | } 1604 | 1605 | /* Construct the suffix array by using 1606 | the sorted order of type B suffixes. */ 1607 | k = SA + BUCKET_A(c2 = T[n - 1]); 1608 | *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); 1609 | /* Scan the suffix array from left to right. */ 1610 | for(i = SA, j = SA + n; i < j; ++i) { 1611 | if(0 < (s = *i)) { 1612 | c0 = T[--s]; 1613 | if((s == 0) || (T[s - 1] < c0)) { s = ~s; } 1614 | if(c0 != c2) { 1615 | BUCKET_A(c2) = k - SA; 1616 | k = SA + BUCKET_A(c2 = c0); 1617 | } 1618 | *k++ = s; 1619 | } else*i = ~s; 1620 | } 1621 | } 1622 | /*---------------------------------------------------------------------------*/ 1623 | 1624 | /*- Function -*/ 1625 | 1626 | int 1627 | divsufsort(const unsigned char *T, int *SA, int *bucket, int n) { 1628 | int *bucket_A=bucket, *bucket_B=bucket+BUCKET_A_SIZE; 1629 | int m; 1630 | int err = 0; 1631 | 1632 | /* Check arguments. */ 1633 | if((T == NULL) || (SA == NULL) || (n < 0))return-1; 1634 | if(n == 0)return 0; 1635 | if(n == 1)return SA[0]=0; 1636 | if(n == 2){SA[m=T[0] 2 | 3 | void e8(uint8_t *in_buf, int32_t n) { 4 | int32_t i; 5 | int32_t *op; 6 | for(i=0; i= -i && *op < n-i) { 17 | *op += i; 18 | } else if ( *op >= n-i && *op < n ) { 19 | *op -= n; // to [-i,1] 20 | } 21 | i+=4; 22 | } 23 | } 24 | } 25 | 26 | void e8back(uint8_t *buf,int32_t n) { 27 | int32_t i; 28 | int32_t *op; 29 | for(i=0; i= -i && *op < 0) { 41 | *op += n; 42 | } else if ( *op >= 0 && *op < n ) { 43 | *op -= i; 44 | } 45 | i+=4; 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /lzoma.h: -------------------------------------------------------------------------------- 1 | #define AuthorID 0xA1Ef 2 | #define AlgoID "LZOM" 3 | #define Version 0x00 4 | 5 | #define BLOCK_STORED 0x80000000 6 | #define BLOCK_LAST 0x40000000 7 | 8 | #define HISTORY_SIZE(dict_size) (32*1024<> 4) 10 | 11 | #define longlen 5400 12 | #define hugelen 0x060000 13 | #define breaklz 512 14 | #define lzmagic 0x002FFe00*2 15 | #define lzshift(top) ((9*top)>>3) 16 | 17 | #define lzlow(total) ((total <= 400000) ? 60 :50) 18 | 19 | -------------------------------------------------------------------------------- /pack.c: -------------------------------------------------------------------------------- 1 | // test file compression based on lzoma algorith 2 | // (c) 2015,2016 Alexandr Efimov 3 | // License: GPL 2.0 or later 4 | // Uses divsufsort library for faster initialization (thanks to xezz for suggestion), see divsufsort.h for its license details. 5 | // 6 | // Discussion thread: http://encode.ru/threads/2280-LZOMA 7 | // 8 | // Notes: 9 | // 10 | // Pros: 11 | // Compression ratio is very good (much higher than lzo, ucl, gzip). 12 | // Decompression speed is very high (faster than gzip, much faster than bzip2,lzham, lzma,xz) 13 | // tiny decompressor code (asm version of decompress function less than 400 bytes) 14 | // 15 | // compressed data format is somewhere between lzo and lzma 16 | // uses static encoding and byte-aligned literals and byte-aligned parts of match offset for decompression speed 17 | // 18 | // Cons: 19 | // compressor is VERY slow. It is possible to implement faster compressor at the cost of some compression ratio. 20 | // may be it is possible to adapt lzma compressor code. 21 | // 22 | // Other: 23 | // Code of both compression/decompression utils is experimental. 24 | // compressed data format is not stable yet. 25 | // compressor source code is more like a ground for experiments, not a finished product yet. 26 | // some commented out code was intended for experiments with Reduced-offset LZ, RLE step before LZ, LZX-style encoding of matches, various heuristics, etc. 27 | // 28 | #include 29 | #include 30 | #include"divsufsort.h" 31 | 32 | #include "lzoma.h" 33 | #include "bpe.h" 34 | #include "e8.h" 35 | 36 | #define MINOLEN 1 37 | #define MINLZ 2 38 | 39 | int level, short_match_level, match_level; 40 | int levels[9][3]= { 41 | {1,1,2}, 42 | {1,2,3}, 43 | {2,3,5}, 44 | {3,5,15}, 45 | {3,7,30}, 46 | {3,10,100}, 47 | {3,20,200}, // default level -7 48 | {3,40,500}, 49 | {3,100,1000} 50 | }; 51 | int verbose = 0; 52 | 53 | int dict_size, history_size, block_size; 54 | 55 | FILE *flzlit=NULL; 56 | FILE *flit=NULL; 57 | FILE *folz=NULL; 58 | FILE *flen=NULL; 59 | FILE *fdist=NULL; 60 | #ifdef EXPERIMENTS 61 | // this is for some experimention only. 62 | FILE *test=NULL; 63 | FILE *test2=NULL; 64 | FILE *test3=NULL; 65 | #endif 66 | 67 | uint32_t *rle; 68 | uint8_t *in_buf; /* text to be encoded */ 69 | //uint8_t *out_buf; - shared with rle 70 | #define out_buf ((uint8_t *)rle) 71 | 72 | typedef struct { 73 | int32_t cache; /* best possible result in bits if we start with lz or letter code */ 74 | int32_t best_ofs; /* best way to start, assuming we do not start with OLD OFFSET code */ 75 | int32_t best_len; /* best way to start - match len, assuming we do not start with OLD OFFSET code */ 76 | int32_t use_olz; /* if not zero, repeat same offset after this number of literals */ 77 | int32_t olz_len; /* length of repeated lz */ 78 | int32_t use_olz2; /* if not zero, repeat same offset after this number of literals after first repeat */ 79 | int32_t olz_len2; /* length of repeated lz */ 80 | } FutureState; 81 | 82 | typedef struct { 83 | int32_t same; /* pointer to previous match of at least 2 bytes. for checking nearby short matches */ 84 | int32_t samelen; /* length of match between this and previous string pointed by "same" */ 85 | 86 | // sorted tree in order to quickly check long matches starting from the longest match 87 | int32_t sorted_len; 88 | int32_t sorted_prev; 89 | int32_t sorted_next; 90 | } PastState; 91 | 92 | // PastState and FutureState share the same memory buffer 93 | // sizeof(PastState) should be < sizeof(FutureState) 94 | void *state; 95 | void *past_state; 96 | 97 | #define sorted ((int32_t *)((uint8_t *)state)) // used very early in initialization 98 | 99 | #define cache(i) ((FutureState *)state)[i-in_offset].cache 100 | #define best_ofs(i) ((FutureState *)state)[i-in_offset].best_ofs 101 | #define best_len(i) ((FutureState *)state)[i-in_offset].best_len 102 | #define use_olz(i) ((FutureState *)state)[i-in_offset].use_olz 103 | #define olz_len(i) ((FutureState *)state)[i-in_offset].olz_len 104 | #define use_olz2(i) ((FutureState *)state)[i-in_offset].use_olz2 105 | #define olz_len2(i) ((FutureState *)state)[i-in_offset].olz_len2 106 | 107 | #define same(i) (((PastState *)past_state)[i].same) 108 | #define samelen(i) (((PastState *)past_state)[i].samelen) 109 | #define sorted_len(i) (((PastState *)past_state)[i].sorted_len) 110 | #define sorted_prev(i) (((PastState *)past_state)[i].sorted_prev) 111 | #define sorted_next(i) (((PastState *)past_state)[i].sorted_next) 112 | 113 | #ifdef _MSC_VER 114 | #include 115 | uint32_t __inline __builtin_clz( uint32_t value ) 116 | { 117 | uint32_t leading_zero = 0; 118 | 119 | if ( _BitScanReverse( &leading_zero, value ) ) 120 | { 121 | return 31 - leading_zero; 122 | } 123 | else 124 | { 125 | return 32; 126 | } 127 | } 128 | #endif 129 | 130 | int in_offset = 0; 131 | 132 | static inline int price_offset(int num,int total) { 133 | if (total<=256) return 8;//top=0; 134 | register int res=8; 135 | register int x=256; 136 | 137 | int top = lzlow(total); 138 | while (1) { 139 | x+=x; 140 | if (x>=total+top) break; /* only 1 bit to be outputted left */ 141 | if (x & lzmagic) 142 | top=lzshift(top); 143 | //if (x>=breaklz) { 144 | if (num=x-total) { res++;} 152 | return res; 153 | } 154 | 155 | static inline int price_replen(int num) {//num>=2 156 | if (num<4) return 2;// 00 01 157 | num-=2; 158 | #define REPLEN_SKEW 1 159 | return REPLEN_SKEW+((31-__builtin_clz(num))<<1); 160 | } 161 | 162 | static inline int price_len(int num) {//num>=2 163 | if (num<4) return 2;// 00 01 10 164 | num-=2; 165 | #define LEN_SKEW 1 166 | return LEN_SKEW+((31-__builtin_clz(num))<<1); 167 | } 168 | 169 | int lastpos; 170 | unsigned int bit_cnt; 171 | int outpos; 172 | 173 | static inline void putbit(int bit) { 174 | bit_cnt>>=1; 175 | if (bit_cnt==0) { 176 | lastpos=outpos; 177 | *(unsigned long*)(out_buf+lastpos)=0; 178 | outpos+=4; 179 | bit_cnt=0x80000000; 180 | } 181 | if (bit) *(unsigned long *)(out_buf+lastpos)|=bit_cnt; 182 | } 183 | 184 | int stlet=0; 185 | int stlz=0; 186 | int stolz=0; 187 | int bitslzlen=0; 188 | int bitsolzlen=0; 189 | int bitslen=0; 190 | int bitsdist=0; 191 | int bitslit=0; 192 | 193 | static inline void putenc(int num,int total, int break_at, int debug) { 194 | char bits[100]; 195 | int res=0; 196 | int x=1; 197 | int obyte=0; 198 | if (fdist) fwrite(&num,1,4,fdist); 199 | obyte=1; 200 | bits[0]=0; 201 | bits[1]=0; 202 | bits[2]=0; 203 | bits[3]=0; 204 | bits[4]=0; 205 | bits[5]=0; 206 | bits[6]=0; 207 | bits[7]=0; 208 | //if (debug) fprintf(stderr,"ofs=%d total=%d\n",num,total); 209 | 210 | int top=lzlow(total); 211 | //if (total<=256) top=0; 212 | while (1) { 213 | x+=x; 214 | if (x>=512&& x>=total+top) break; /* only 1 bit to be outputted left */ 215 | if (x & lzmagic) 216 | top=lzshift(top); 217 | if (x>=break_at) { 218 | if (num=x) { 227 | num+=x; 228 | bits[res++]=2; 229 | } 230 | 231 | doneit: 232 | for(;res<8;res++) { 233 | bits[res++]=2; 234 | } 235 | for(x=res-1;x>=0;x--) { 236 | if (bits[x]==2) { 237 | bits[x]=num&1; 238 | num>>=1; 239 | } 240 | } 241 | if (obyte) { 242 | //printf("res=%d\n", res); 243 | uint8_t b=0; 244 | for(x=0;x<8;x++) { 245 | if (debug) printf("%d",bits[x]); 246 | if (bits[x]) b|=128>>x; 247 | } 248 | if (debug) printf(" "); 249 | if (!debug) out_buf[outpos++]=b; 250 | for(;x>1)) {bits[res++]=0; break;} 279 | bits[res++]=1; 280 | num-=x>>1; 281 | bits[res++]=2; 282 | } 283 | 284 | for(x=res-1;x>=0;x--) { 285 | if (bits[x]==2) { 286 | bits[x]=num&1; 287 | num>>=1; 288 | } 289 | } 290 | for(x=0;xb? a:b; 315 | } 316 | 317 | static inline void put_lz(int offset,int length,int used) { 318 | #ifdef EXPERIMENTS 319 | uint16_t code512 = 0x100; 320 | #endif 321 | 322 | if (flzlit) fprintf(flzlit,"%c",1); 323 | putbit(1); bitslzlen++; 324 | offset=-offset; /* 1.. */ 325 | offset--; /* 0.. */ 326 | if (was_letter) { bitsolzlen++; 327 | was_letter=0; 328 | if (old_ofs==offset) { 329 | stolz++; 330 | if (folz) fprintf(folz,"%c",0); 331 | 332 | #ifdef EXPERIMENTS 333 | // test combining everything into one model for simple entropy coding 334 | code512 |= 0x80; 335 | if (length-MINOLEN < 0x7F) { 336 | code512 |= length-MINOLEN; 337 | } else { 338 | code512 |= 0x3F; 339 | length-=MINOLEN+0x7F; 340 | fwrite(&length, 4, 1, test2); 341 | length+=MINOLEN+0x7F; 342 | } 343 | code512 = (code512 & 0xFF) << 8 | (code512>>8); 344 | fwrite(&code512, 2, 1, test); 345 | #endif 346 | 347 | putbit(0); 348 | putenc_l(length-MINOLEN); 349 | return; 350 | } 351 | if (folz) fprintf(folz,"%c",1); 352 | putbit(1); 353 | } 354 | length-=MINLZ; 355 | stlz++; 356 | if (offset+1>=longlen) { length--; } 357 | if (offset+1>=hugelen) { length--; } 358 | 359 | #ifdef EXPERIMENTS 360 | if (length < 15) { 361 | code512 |= length; 362 | } else { 363 | code512 |= 15; 364 | length-=15; 365 | fwrite(&length, 4, 1, test2); 366 | length+=15; 367 | } 368 | code512 |= (offset & 0x7) << 4; 369 | code512 = (code512 & 0xFF) << 8 | (code512>>8); 370 | fwrite(&code512, 2, 1, test); 371 | uint tmpofs = offset >> 4; 372 | fwrite(&tmpofs, 4, 1, test3); 373 | #endif 374 | 375 | putenc(offset,used,breaklz, 0); 376 | putenc_l(length-MINLZ+2); 377 | 378 | old_ofs=offset; 379 | } 380 | 381 | static inline void put_letter(uint8_t b) { 382 | #ifdef EXPERIMENTS 383 | uint16_t code512 = b; 384 | code512 = (code512 & 0xFF) << 8 | (code512>>8); 385 | fwrite(&code512, 2, 1, test); 386 | #endif 387 | 388 | if (flzlit) fprintf(flzlit,"%c",0); 389 | if (flit) fprintf(flit,"%c",b); 390 | putbit(0); bitslzlen++; 391 | out_buf[outpos++]=b; bitslit+=8; 392 | was_letter++; 393 | stlet++; 394 | } 395 | 396 | static inline int price_lz(int offset, int length, int used) { // offset>=1, length>=2, 397 | // if offset=>0xD00 length>=3 398 | int res=1; /* 1 bit = not a letter */ 399 | if (offset>=longlen) { length--; } 400 | if (offset>=hugelen) { length--; } 401 | 402 | offset--; // 0.. 403 | 404 | res+=price_offset(offset,used); 405 | res+=price_len(length-MINLZ+2); 406 | return res; 407 | } 408 | 409 | static inline int price_lzlen(int offset, int length, int used) { // offset>=1, length>=2, 410 | // if offset=>0xD00 length>=3 411 | int res=1; /* 1 bit = not a letter */ 412 | if (offset>=longlen) { length--; } 413 | if (offset>=hugelen) { length--; } 414 | 415 | res+=price_len(length-MINLZ+2); 416 | return res; 417 | } 418 | 419 | static inline int price_replz_minus_lz(int offset, int length, int used) { // offset>=1, length>=2, 420 | // if offset=>0xD00 length>=3 421 | int res=2 /* lzlit flag, replz flag */ +price_replen(length+2-MINOLEN); 422 | return res-price_lz(offset,length,used); 423 | } 424 | 425 | static inline int cmpstr(int src,int src2) { 426 | int res=0; 427 | int b; 428 | 429 | for(;;) { 430 | if (in_buf[src]!=in_buf[src2]) return res; 431 | b=rle[src2]; 432 | if (!b) return res; 433 | if (b>rle[src]) {return res+rle[src];} 434 | res+=b; 435 | src+=b; 436 | src2+=b; 437 | } 438 | return res; 439 | } 440 | 441 | int cmpstrsort(int *psrc,int *psrc2) { 442 | int b; 443 | int src = *psrc; 444 | int src2 = *psrc2; 445 | // printf("%d:%d:%d\n",src,src2,left); 446 | do { 447 | if (in_buf[src]in_buf[src2]) return 1; 449 | b=rle[src2]; 450 | if (!b) return 1; // first string is longer 451 | if (b>rle[src]) b=rle[src]; 452 | if (!b) return -1; // second string is longer 453 | src+=b; 454 | src2+=b; 455 | } while(1); 456 | } 457 | 458 | void init_same(int start, int n) { 459 | int i; 460 | uint16_t bb; 461 | int run_len; 462 | int gen_same[256*256+256]; 463 | 464 | /* 465 | Notes: the slowest parts here are PLCP array construction and divsufsort. 466 | On slower levels -7 .. -9 it does not matter. 467 | But on fast levels -1..-3 (that still provide good compression), 468 | initialization takes about 20-30% processing time. 469 | 470 | Also, it reprocesses whole history each time a new block is read, 471 | which is clearly not optimal. 472 | 473 | possible optimizations: 474 | 1. store SA for later reuse, do divsufsort for new block only, then 475 | merge them. not sure if it will be faster. still need to recalculate rlcp 476 | 2. get rid of SA completely, construct suffix tree directly. 477 | */ 478 | for(i=0;i<256+256*256;i++) gen_same[i] =0; // for bucketA & bucketB 479 | divsufsort(in_buf,sorted,gen_same,n); 480 | // reuse sorted_prev for temp buffer 481 | #define rank(i) rle[i] 482 | /* 483 | calculate plcp in O(n) time 484 | see http://www.cs.ucr.edu/~stelo/cpm/cpm09/04_karkk.pdf 485 | http://www.mi.fu-berlin.de/wiki/pub/ABI/Sequence_analysi_2013/2004_ManziniTwo_Space_Saving_Tricks_for_Linear_Time_LCP_Array_Computation.pdf 486 | */ 487 | for(i=1;i<=n-1;i++) rank(sorted[i]) = sorted[i-1]; 488 | rank(sorted[0]) = sorted[n-1]; 489 | 490 | sorted_prev(sorted[0])=-1; 491 | for(i=1;i0) h--; 506 | } 507 | 508 | rle[n] = run_len = 0; 509 | uint8_t b = in_buf[n-1]; 510 | for(i=n-1;i>=0;i--) { 511 | if (in_buf[i]==b) 512 | run_len++; 513 | else { 514 | b=in_buf[i]; 515 | run_len = 1; 516 | } 517 | rle[i]=run_len; 518 | } 519 | 520 | bb=0; 521 | 522 | for(i=0;i<65536;i++) {gen_same[i]=-1; } 523 | for(i=0;i=0) { samelen(i)=1+cmpstr(i+1,same(i)+1);} 527 | gen_same[bb]=i; 528 | } 529 | same(i)=-1; 530 | 531 | in_buf[n]=0; 532 | 533 | if (verbose) printf("init done.\n"); 534 | } 535 | 536 | #define CHECK_REPLZ \ 537 | int k;\ 538 | int jjj;\ 539 | int d=level;\ 540 | int tmp=pofs+price_lzlen(used-pos,len,used);\ 541 | int olen=0;\ 542 | for(k=len+1;k=MINOLEN) {\ 572 | int tmp2=tmp+2+price_replen(olen+2-MINOLEN);\ 573 | tmp2+=cache(used+k+olen);\ 574 | if (best_len(used+k+olen)==1) {\ 575 | int jj;\ 576 | for(jj=1;jj<=8;jj++) {\ 577 | if (best_len(used+k+olen+jj)>1) {\ 578 | if (best_ofs(used+k+olen+jj)==pos-used) {\ 579 | tmp2+=price_replz_minus_lz(used-pos,best_len(used+k+olen+jj),used+k+olen+jj);\ 580 | break;\ 581 | }\ 582 | }\ 583 | int olen2=cmpstr(used+k+olen+jj,pos+k+olen+jj);\ 584 | for (jjj=MINOLEN;jjj<=olen2;jjj++) {\ 585 | /* if (olen2>=MINOLEN) {*/\ 586 | int tmp3=-cache(used+k+olen);\ 587 | tmp3+=jj*9+2+price_replen(jjj+2-MINOLEN);\ 588 | tmp3+=cache(used+k+olen+jj+jjj);\ 589 | if (tmp3<0) { tmp3+=tmp2;\ 590 | if (tmp3=0) { 634 | sorted_next(sorted_prev(n-1))=sorted_next(n-1); 635 | } 636 | if (sorted_next(n-1)>=0) { 637 | sorted_len(sorted_next(n-1)) = Min(sorted_len(sorted_next(n-1)), 638 | sorted_len(n-1)); 639 | sorted_prev(sorted_next(n-1))=sorted_prev(n-1); 640 | } 641 | 642 | for(i=n-2;i>=start;i--) { 643 | int used=i; 644 | int left=n-i; 645 | int res; 646 | int pos; 647 | int max_match; 648 | int len; 649 | int j; 650 | 651 | int my_best_ofs=0; 652 | int my_best_len=1; 653 | int my_use_olz=0; 654 | int my_use_olz2=0; 655 | int my_olz_len=0; 656 | int my_olz_len2=0; 657 | int match_check_max; 658 | int notskip = 1; 659 | 660 | res=9+cache(used+1); 661 | if (best_ofs(used+1)) { 662 | res++; 663 | if (in_buf[used]==in_buf[used+1]) { 664 | if (in_buf[used]==in_buf[used-1]) { 665 | if (in_buf[used]==in_buf[used+best_ofs(used+1)]) { 666 | if ((best_len(used+1)>3)||(best_len(used+1)==3&&-best_ofs(used+1)=5) 679 | notskip = 0; 680 | } 681 | } 682 | } 683 | } 684 | } 685 | 686 | int k; 687 | for(k=1;k<4;k++) 688 | if (n-i>2+k && best_ofs(used+2+k) 689 | && used+best_ofs(used+2+k) >= 0 690 | && -best_ofs(used+2+k) < longlen && best_ofs(used+2+k)!=best_ofs(used+1+k)) { 691 | if (in_buf[used]==in_buf[used+best_ofs(used+2+k)]) { 692 | if (in_buf[used+1]==in_buf[used+1+best_ofs(used+2+k)]) { 693 | int tmp=cache(used+2+k)+price_replz_minus_lz(-best_ofs(used+2+k),best_len(used+2+k),used+2+k) 694 | +9*k+price_lz(-best_ofs(used+2+k),2,used); 695 | if (tmp<=res) { 696 | res=tmp; 697 | my_best_ofs=best_ofs(used+2+k); 698 | my_best_len=2; 699 | my_use_olz=k; 700 | my_olz_len=best_len(used+2+k); 701 | my_use_olz2=use_olz(used+2+k); 702 | my_olz_len2=olz_len(used+2+k); 703 | } 704 | } 705 | } 706 | } 707 | 708 | for(k=1;k<4;k++) 709 | if (n-i>3+k && best_ofs(used+3+k) 710 | && used+best_ofs(used+3+k) >= 0 711 | && -best_ofs(used+3+k) < hugelen && best_ofs(used+3+k)!=best_ofs(used+2+k)) { 712 | if (in_buf[used]==in_buf[used+best_ofs(used+3+k)]) { 713 | if (in_buf[used+1]==in_buf[used+1+best_ofs(used+3+k)]) { 714 | if (in_buf[used+2]==in_buf[used+2+best_ofs(used+3+k)]) { 715 | int tmp=cache(used+3+k)+price_replz_minus_lz(-best_ofs(used+3+k),best_len(used+3+k),used+3+k) 716 | +9*k+price_lz(-best_ofs(used+3+k),3,used); 717 | if (tmp<=res) { 718 | res=tmp; 719 | my_best_ofs=best_ofs(used+3+k); 720 | my_best_len=3; 721 | my_use_olz=k; 722 | my_olz_len=best_len(used+3+k); 723 | my_use_olz2=use_olz(used+3+k); 724 | my_olz_len2=olz_len(used+3+k); 725 | } 726 | } 727 | } 728 | } 729 | } 730 | 731 | for(k=1;k<4;k++) 732 | if (n-i>4+k && best_ofs(used+4+k) 733 | && used+best_ofs(used+4+k) >= 0 734 | && best_ofs(used+4+k)!=best_ofs(used+3+k)) { 735 | if (in_buf[used]==in_buf[used+best_ofs(used+4+k)]) { 736 | if (in_buf[used+1]==in_buf[used+1+best_ofs(used+4+k)]) { 737 | if (in_buf[used+2]==in_buf[used+2+best_ofs(used+4+k)]) { 738 | if (in_buf[used+3]==in_buf[used+3+best_ofs(used+4+k)]) { 739 | int tmp=cache(used+4+k)+price_replz_minus_lz(-best_ofs(used+4+k),best_len(used+4+k),used+4+k) 740 | +9*k+price_lz(-best_ofs(used+4+k),4,used); 741 | if (tmp<=res) { 742 | res=tmp; 743 | my_best_ofs=best_ofs(used+4+k); 744 | my_best_len=4; 745 | my_use_olz=k; 746 | my_olz_len=best_len(used+4+k); 747 | my_use_olz2=use_olz(used+4+k); 748 | my_olz_len2=olz_len(used+4+k); 749 | } 750 | } 751 | } 752 | } 753 | } 754 | } 755 | pos=same(used); 756 | if (pos<0) goto done; 757 | if (!notskip) goto done; 758 | 759 | { 760 | len=samelen(used); 761 | int ll=(used-pos>=longlen)?1:0; 762 | if (used-pos>=hugelen) ll=2; 763 | int pofs = price_offset(used-pos-1,used); 764 | if (len=2+ll) { 765 | CHECK_REPLZ 766 | } 767 | for(j=MINLZ+ll;j<=len;j++) { 768 | int tmp=pofs+price_lzlen(used-pos,2-MINLZ+j,used); 769 | tmp+=cache(used+j); 770 | if (tmp=longlen)?1:0; 787 | if (used-pos>=hugelen) ll=2; 788 | //if (used-pos>=longlen) break; 789 | if (pos<0) break; 790 | if (len>slen) { 791 | len=slen; 792 | } else if (len==slen) { 793 | len+=cmpstr(used+len,pos+len); 794 | } 795 | int pofs = price_offset(used-pos-1,used); 796 | if (len=2+ll) { 797 | CHECK_REPLZ 798 | } 799 | if (len>max_match) { 800 | for(j=Max(max_match+1,MINLZ+ll);j<=len;j++) { 801 | int tmp=pofs+price_lzlen(used-pos,j-MINLZ+2,used); 802 | tmp+=cache(used+j); 803 | if (tmp= 0 ? sorted_len(used) : 0; 819 | int len_bottom=bottom >= 0 ? sorted_len(bottom) : 0; 820 | 821 | match_check_max = match_level; 822 | int my_min_ofs=used+1; 823 | while (top>=0 || bottom >=0) { 824 | match_check_max--; 825 | if (match_check_max<=0) goto done; 826 | if (len_top>len_bottom) { 827 | pos=top; 828 | len=len_top; 829 | len_top = Min(len_top,top >= 0 ? sorted_len(top):0); 830 | top=sorted_prev(pos); 831 | } else { 832 | pos=bottom; 833 | len=len_bottom; 834 | bottom=sorted_next(pos); 835 | len_bottom = Min(len_bottom,bottom >= 0 ? sorted_len(bottom):0); 836 | } 837 | if (len<=MINLZ) goto done; 838 | if (len<=MINLZ+1 && used-pos>=hugelen) continue; // 839 | int pofs = price_offset(used-pos-1,used); 840 | if (lenused-pos) { 844 | my_min_ofs=used-pos;//we are checking matches in decreasing order. we need to check next matches only if those are shorter 845 | int ll=(used-pos>=hugelen)?1:0; 846 | for(j=MINLZ+1+ll;j<=len;j++) { 847 | int tmp=pofs+price_lzlen(used-pos,j-MINLZ+2,used); 848 | tmp+=cache(used+j); 849 | if (tmp=0) { 863 | sorted_next(sorted_prev(used))=sorted_next(used); 864 | } 865 | if (sorted_next(used)>=0) { 866 | sorted_len(sorted_next(used)) = Min(sorted_len(sorted_next(used)), 867 | sorted_len(used)); 868 | sorted_prev(sorted_next(used))=sorted_prev(used); 869 | } 870 | 871 | best_ofs(used)=my_best_ofs; 872 | best_len(used)=my_best_len; 873 | use_olz(used)=my_use_olz; 874 | olz_len(used)=my_olz_len; 875 | use_olz2(used)=my_use_olz2; 876 | olz_len2(used)=my_olz_len2; 877 | cache(used)=res; 878 | 879 | if (verbose && (i&0xFFF)==0) { 880 | printf("\x0D%d left ",i-start); 881 | fflush(stdout); 882 | } 883 | } 884 | 885 | res=8+cache(start); 886 | if (verbose) printf("\nres=%d\n",res); 887 | res+=7; 888 | res>>=3; 889 | if (verbose) printf("res bytes=%d\n",res); 890 | if (res>=n-start) { 891 | return n; 892 | }; 893 | 894 | /* now we can easily generate compressed stream */ 895 | initout(start); 896 | for(i=start;i0) { 911 | for(;k>0;k--) put_letter(in_buf[i++]); 912 | if ((use_olz(i))&&(len==best_len(i))&&(ofs==best_ofs(i))) goto dolz; 913 | // printf("put_lz %d:%d,left=%d\n",ofs,len,n-i); 914 | put_lz(ofs,len,i); 915 | i+=len; 916 | if (k2>0) { 917 | for(;k2>0;k2--) put_letter(in_buf[i++]); 918 | if ((use_olz(i))&&(len2==best_len(i))&&(ofs==best_ofs(i))) goto dolz; 919 | // printf("put_lz %d:%d,left=%d\n",ofs,len,n-i); 920 | put_lz(ofs,len2,i); 921 | i+=len2; 922 | 923 | } 924 | 925 | } 926 | } 927 | } 928 | if (verbose) printf("out bytes=%d\n",outpos); 929 | return outpos; 930 | } 931 | 932 | int main(int argc,char *argv[]) { 933 | FILE *ifd,*ofd; 934 | int n,i,bres,blz; 935 | uint8_t b; 936 | 937 | if (argc<3) { 938 | // note: -d0 (32k history) does not work right now 939 | printf("usage: lzoma [OPTION] input output [lzlit lit olz len dist]\n" 940 | "\t-1 .. -9 Compression level (default 7)\n" 941 | "\t-d[1..15] History size (default 9: 16M history; compression currently requires about 30x*history RAM)\n" 942 | "\t-v Be verbose\n" 943 | ); 944 | printf("Notice: this program is at experimental stage of development. Compression format is not stable yet.\n"); 945 | if (argc>1 && argv[1][0]=='%') { // undocumented debug feature to check correctness of offset encoding, when tuning parameters in lzoma.h 946 | int i; 947 | int total=atoi(argv[1]+1);//16*1024*1024; 948 | printf("%d\n",total); 949 | for(i=total-10;i='1' && argv[arg][1]<='9') 962 | metalevel = argv[arg][1]-'0'; 963 | if (argv[arg][1]=='v') 964 | verbose = 1; 965 | if (argv[arg][1]=='d') { 966 | dict_size = atoi(argv[arg]+2); 967 | if (dict_size <1) dict_size=1; 968 | if (dict_size >15) dict_size=15; 969 | } 970 | arg++; 971 | } 972 | history_size=HISTORY_SIZE(dict_size); 973 | block_size=BLOCK_SIZE(dict_size); 974 | metalevel--; 975 | level=levels[metalevel][0]; 976 | short_match_level=levels[metalevel][1]; 977 | match_level=levels[metalevel][2]; 978 | in_buf = (void *)malloc(history_size * sizeof(uint8_t)+1); 979 | rle = (void *)malloc(history_size * sizeof(uint32_t)); 980 | state = (void *)malloc(Max(block_size * sizeof(FutureState), history_size * sizeof(uint32_t))); 981 | past_state = (void *)malloc(history_size * sizeof(PastState)); 982 | char *inf=argv[arg++]; 983 | char *ouf=argv[arg++]; 984 | ifd=fopen(inf,"rb"); 985 | ofd=fopen(ouf,"wb"); 986 | if (arghistory_size-block_size) { 1002 | memmove(in_buf, in_buf+block_size, history_size-block_size); 1003 | in_offset -= block_size; 1004 | } 1005 | n=fread(in_buf+in_offset,1,block_size,ifd); 1006 | if (n<=0) { 1007 | blk = BLOCK_STORED | BLOCK_LAST; 1008 | fwrite(&blk,4,1,ofd); 1009 | break; 1010 | } 1011 | if (verbose) printf("got %d bytes, packing...\n",n); 1012 | if (blocknum==0) { 1013 | /* 1014 | int b1=cnt_bpes(in_buf,n); 1015 | int use_e8=1; 1016 | e8(in_buf, n); 1017 | int b2=cnt_bpes(in_buf,n); 1018 | printf("stats noe8 %d e8 %d\n",b1,b2); 1019 | if (b2<=b1) { 1020 | use_e8=0; 1021 | printf("reverted e8\n"); 1022 | 1023 | e8back(in_buf,n); 1024 | } 1025 | */ 1026 | /* 1027 | write compressed file header 1028 | we do it here only after we read some data 1029 | TODO: 1030 | at this stage we should decide if we will use any file-level compression filters 1031 | */ 1032 | uint8_t header[8]; 1033 | header[0] = AuthorID >> 8; 1034 | header[1] = AuthorID & 0xFF; 1035 | header[2] = AlgoID[0]; 1036 | header[3] = AlgoID[1]; 1037 | header[4] = AlgoID[2]; 1038 | header[5] = AlgoID[3]; 1039 | header[6] = Version; 1040 | int flags=0; 1041 | header[7] = flags << 4 | dict_size; 1042 | fwrite(header,8,1,ofd); 1043 | 1044 | bres=pack(1,n); 1045 | } else { // next blocks 1046 | bres=pack(in_offset,in_offset+n); 1047 | } 1048 | uint32_t blk = (n < block_size) ? BLOCK_LAST : 0; 1049 | if (bres==n) { 1050 | blk |= BLOCK_STORED; 1051 | blk |= n; 1052 | fwrite(&blk,4,1,ofd); 1053 | fwrite(in_buf+in_offset,1,n,ofd); 1054 | } else { 1055 | blk |= bres; 1056 | fwrite(&blk,4,1,ofd); 1057 | if (blk & BLOCK_LAST) 1058 | fwrite(&n,4,1,ofd); 1059 | fwrite(out_buf,1,bres,ofd); 1060 | if (blk & BLOCK_LAST) 1061 | break; 1062 | } 1063 | 1064 | in_offset += n; 1065 | } 1066 | if (verbose) printf("closing files let=%d lz=%d olz=%d\n",stlet,stlz,stolz); 1067 | if (verbose) printf("bits lzlit=%d let=%d olz=%d match=%d len=%d\n",bitslzlen,bitslit,bitsolzlen,bitsdist,bitslen); 1068 | fclose(ifd); 1069 | fclose(ofd); 1070 | 1071 | #ifdef EXPERIMENTS 1072 | fclose(test); 1073 | fclose(test2); 1074 | fclose(test3); 1075 | #endif 1076 | 1077 | return 0; 1078 | } 1079 | -------------------------------------------------------------------------------- /readme.MSVC: -------------------------------------------------------------------------------- 1 | To compile with MSVC: 2 | 3 | Open Developer Command Prompt and type: 4 | cl pack.c divsufsort.c 5 | cl unpack.c 6 | 7 | Note: MSVC support is currently untested, probably has bugs. Better use gcc or mingw if possible. 8 | 9 | 10 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | Experimental packer based on new compression algoritm LZOMA). 2 | (C)2015-2016 Alexandr Efimov 3 | 4 | This code can be redistributed on GPL Version 2 License. 5 | For commercial licenses or support please contact author. 6 | 7 | Project goals: 8 | extremely fast in-place decompression (similar to LZO) 9 | but with high compression ratio (much better than LZO, GZIP, BZIP2) 10 | 11 | Current results: 12 | 13 | Compression ratio is much higher than gzip. And much much higher than LZO. 14 | Decompression speed is similar to UCL (a bit slower that LZO, faster than 15 | gzip, bzip2 etc). 16 | Decompressor code length is less than 300 bytes. 17 | Has special filter for x86 code. 18 | Decompression can be done in-place and does not require additional memory. 19 | 20 | Overall, the results a very good for "compress once, unpack often" tasks like 21 | linux kernel & ramdisk, readonly compressed filesystems. 22 | 23 | Comparison with other compression software: 24 | Nearest competitors are zstd, brotli. 25 | Other compressors/archives either decompress much slower or has much worse compression ratio. 26 | 27 | Compression ratio on binary files (without effect of e8e8 filter), 28 | from best to worst: 29 | brotli, lzoma, zstd 30 | 31 | Compression ratio on text files: 32 | brotli, zstd, lzoma 33 | 34 | Decompressor code size: 35 | lzoma, zstd, brotli 36 | 37 | Decompression speed, on x86-64: 38 | zstd is about 2x faster, lzoma and brotli has similar speed. 39 | 40 | Decompression speed, on Intel Atom tablet: 41 | zstd and lzoma has similar speed, brotli is 4x slower. 42 | 43 | Algorithm description. 44 | 45 | Compressed format has some features similar to both LZO and LZMA. 46 | Does not use range coding. 47 | Special bit added to matches that follow literals, indicating to re-use 48 | previous offset instead of always storing the offset for each match. 49 | This allows to more efficiently compress patterns like abcdEabcdFabcdGabc, as 50 | offset will be stored only for first match. 51 | 52 | This idea allows much higher compression than classical LZ algorithms but 53 | compressor is much more complicated. 54 | 55 | Compressed data format: 56 | literal, item, ... item 57 | 58 | Where: 59 | literal is uncompressed byte aligned at byte boundary 60 | item is: 61 | 1 bit flag (literal | match) 62 | if flag is literal then literal follows 63 | 64 | if flag is match then 65 | if previous item was literal 66 | 1 bit flag==1: use previous offset for match 67 | if not use previous offset for match 68 | offset (encoded) 69 | len (encoded) 70 | 71 | Notes: 72 | 73 | Algorithm is still experimental, compressed format is not final yet. 74 | 75 | File format (WIP, not implemented yet): 76 | 1. Header 77 | uint8_t[2] AuthorID 0xA1, 0xEF // this goes before AlgoID to avoid possible signature conflict with other LZ compressors 78 | uint8_t[4] AlgoID 'L','Z','O','M' 79 | uint8_t Version 0x00 80 | uint8_t HistorySize (low 4 bits) || Flags 81 | where HistorySize is 82 | 0: 32k 83 | 1: 64k 84 | 2: 128k 85 | 3: 256k 86 | 4: 512k 87 | 5: 1M 88 | 6: 2M 89 | 7: 4M 90 | 8: 8M 91 | 9: 16M 92 | 10:32M 93 | 11:64M 94 | 12:128M 95 | 13:256M 96 | 14:512M 97 | 15: 1G 98 | BlockSize = HistorySize / 16 99 | 100 | Flags: 101 | 0x10 - use filters, 1 byte filter type follows 102 | 0x00 - x86 103 | 0x01 - x86-64 104 | 0x02 - arm 105 | 0x03 - mips 106 | 0x04 - 0xF - reserved 107 | 0x10 - use delta filter 108 | 0x20 - text/xml filter 109 | 0x40 - reserved 110 | 0x80 - reserved 111 | 0x20 - encrypted file 112 | TODO: some compression header follows 113 | 0x40 - digitally signed file (signature follows at the end of file) 114 | 0x80 - reserved 115 | 116 | 2. Blocks 117 | Blocks header is 4 bytes or more: 118 | high bits masks: 119 | 0x80000000 - if set, it is a stored block 120 | 0x40000000 - last block, 4 byte unpacked length follows unless it is a stored block 121 | if not set, unpacked length assumed to be BLOCK_SIZE 122 | 0x20000000 - reserved 123 | 0x10000000 - reserved 124 | low 28 bits = packed length up to 2^28, can be zero 125 | 126 | 3. uint32_t CRC 127 | 128 | -------------------------------------------------------------------------------- /unpack.c: -------------------------------------------------------------------------------- 1 | // test file decompression using LZOMA algoritm 2 | // (c) Alexandr Efimov, 2015-2016 3 | // License: GPL v2 or later 4 | 5 | #include 6 | #include 7 | #include 8 | #ifndef _MSC_VER 9 | #include 10 | #endif 11 | #include 12 | #include 13 | //#include 14 | 15 | #ifndef O_BINARY 16 | #ifdef _O_BINARY 17 | #define O_BINARY _O_BINARY 18 | #else 19 | #define O_BINARY 0 20 | #endif 21 | #endif 22 | 23 | #include "lzoma.h" 24 | 25 | uint8_t *in_buf; /* text to be decoded */ 26 | uint8_t *out_buf;/* decoded text + history */ 27 | 28 | //#define getbit (((bits=bits&0x7f? bits+bits : (((unsigned)(*src++))<<1)+1)>>8)&1) 29 | #define getbit ((bits=bits&0x7fffffff? (resbits=bits,bits+bits) : (src+=4,resbits=*((uint32_t *)(src-4)),(resbits<<1)+1)),resbits>>31) 30 | 31 | #define getcode(bits, src, ptotal) {\ 32 | int total = (ptotal);\ 33 | ofs=0;\ 34 | long int res=0;\ 35 | int x=256;\ 36 | int top=0;\ 37 | top=lzlow(total);\ 38 | res=*src++;\ 39 | \ 40 | while (1) {\ 41 | x+=x;\ 42 | if (x>=total+top) break;\ 43 | if (x & lzmagic)\ 44 | top=lzshift(top);\ 45 | if (res=x) { \ 53 | res+=res+getbit;\ 54 | res-=x;\ 55 | }\ 56 | getcode_doneit: \ 57 | ofs+=res;\ 58 | } 59 | 60 | #define getlen(bits, src) {\ 61 | long int res=0;\ 62 | \ 63 | if (getbit==0) {\ 64 | len+=getbit;\ 65 | goto getlen_0bit;\ 66 | }\ 67 | len+=2;\ 68 | while (1) { \ 69 | res+=res+getbit;\ 70 | if (getbit==0) break;\ 71 | res++;\ 72 | }\ 73 | len+=res;\ 74 | getlen_0bit: ;\ 75 | } 76 | 77 | static void unpack_c(int current_history_size, int history_size, uint8_t *src, uint8_t *dst, uint8_t *start, int left) { 78 | int ofs=-1; 79 | int len; 80 | uint32_t bits=0x80000000; 81 | uint32_t resbits; 82 | left--; 83 | history_size--;// becomes mask for circular buffer indexing 84 | if (current_history_size) { 85 | current_history_size-=dst-start; 86 | goto nextblock; 87 | } 88 | 89 | copyletter: 90 | *dst++=*src++; 91 | left--; 92 | nextblock: 93 | len=-1; 94 | 95 | get_bit: 96 | if (left<0) return; 97 | if (getbit==0) goto copyletter; 98 | 99 | /* unpack lz */ 100 | if (len<0) { 101 | len=1; 102 | if (!getbit) { 103 | goto uselastofs; 104 | } 105 | } 106 | len=2; 107 | getcode(bits,src,dst-start+current_history_size); 108 | ofs++; 109 | if (ofs>=longlen) len++; 110 | if (ofs>=hugelen) len++; 111 | ofs=-ofs; 112 | uselastofs: 113 | getlen(bits,src); 114 | left-=len; 115 | 116 | int ptr = dst-start+ofs; 117 | do { 118 | *dst=start[ptr&(history_size)]; 119 | ptr++; 120 | dst++; 121 | } while(--len); 122 | goto get_bit; 123 | } 124 | 125 | #ifdef ASM_X86 126 | extern unsigned int unpack_x86(uint8_t *src, uint8_t *dst, int left); 127 | #endif 128 | 129 | #include "e8.h" 130 | int main(int argc,char * argv[]) { 131 | int ifd,ofd; 132 | int n,n_unp; 133 | char shift; 134 | 135 | if (argc<3) { 136 | printf("usage: unpack input output\n Unpacks file packed using lzoma algoritm\n"); 137 | printf("Notice: this program is at experimental stage of development. Compression format is not stable yet.\n"); 138 | exit(0); 139 | } 140 | 141 | ifd=open(argv[1],O_RDONLY|O_BINARY); 142 | ofd=open(argv[2],O_WRONLY|O_TRUNC|O_CREAT|O_BINARY,511); 143 | int current_history = 0; 144 | int ofs = 0; 145 | int use_e8=0; 146 | uint8_t header[8]; 147 | read(ifd,header,8); 148 | if (header[0] != (AuthorID >> 8) || 149 | header[1] != (AuthorID & 0xFF) || 150 | header[2] != AlgoID[0] || 151 | header[3] != AlgoID[1] || 152 | header[4] != AlgoID[2] || 153 | header[5] != AlgoID[3] || 154 | header[6] != Version) { 155 | fprintf(stderr, "Unsupported compressed data format\n"); 156 | return 1; 157 | } 158 | int dict_size = header[7] & 0xF; 159 | int history_size = HISTORY_SIZE(dict_size); 160 | int block_size = BLOCK_SIZE(dict_size); 161 | in_buf = (uint8_t *)malloc(block_size); 162 | out_buf = (uint8_t *)malloc(history_size); // history is 16*block_size 163 | 164 | uint32_t blk; 165 | while(read(ifd,&blk,4)==4) { 166 | //if (use_e8) e8(out_buf,n_unp); 167 | n = blk & (block_size-1); 168 | if (blk & BLOCK_STORED) { 169 | n_unp = n; 170 | } else if (blk & BLOCK_LAST) { 171 | read(ifd,&n_unp,4); 172 | } else { 173 | n_unp = block_size; 174 | } 175 | /* 176 | if (n != n_unp && !current_history) 177 | read(ifd,&use_e8,1); 178 | else 179 | use_e8 = 0; 180 | */ 181 | //long unsigned tsc = (long unsigned)__rdtsc(); 182 | if (n == n_unp) { 183 | read(ifd,out_buf,n_unp); 184 | write(ofd,out_buf+ofs,n_unp); 185 | } else { 186 | read(ifd,in_buf,n); 187 | #ifdef ASM_X86 188 | #error Asm version not yet updated for recent format changes. Please use C version right now. 189 | unpack_x86(in_buf, out_buf, n_unp); 190 | #else 191 | unpack_c(current_history, history_size, in_buf, out_buf+ofs, out_buf, n_unp); 192 | #endif 193 | //tsc=(long unsigned)__rdtsc()-tsc; 194 | //printf("tsc=%lu\n",tsc); 195 | //if (use_e8) e8back(out_buf,n_unp); 196 | write(ofd,out_buf+ofs,n_unp); 197 | } 198 | if (blk & BLOCK_LAST) 199 | break; 200 | ofs+=n_unp; 201 | ofs &= (history_size-1); 202 | current_history += n_unp; 203 | if (current_history > history_size-block_size) 204 | current_history = history_size-block_size; 205 | } 206 | 207 | close(ifd); 208 | close(ofd); 209 | return 0; 210 | } 211 | -------------------------------------------------------------------------------- /unpack_lzoma.S: -------------------------------------------------------------------------------- 1 | .file "unpack_lzoma.S" 2 | .section .text.unlikely,"ax",@progbits 3 | .text 4 | .align 16 5 | .globl unpack_x86 6 | .type unpack_x86, @function 7 | unpack_x86: 8 | pushl %ebp 9 | pushl %edi 10 | pushl %esi 11 | pushl %ebx 12 | movl 28(%esp), %edx # uncompressed bytes num 13 | movl 20(%esp), %esi # input buffer 14 | decl %edx 15 | movl 24(%esp), %edi # edi = output buffer 16 | pushl %edi # save output buffer start in stack 17 | xorl %ebp, %ebp 18 | incl %ebp # ebp = offset = -1 19 | movl $0x80000000,%eax 20 | .copyletter: 21 | movsb 22 | #movb (%esi), %cl # cl = *src 23 | #inc %esi # src++ 24 | #movb %cl, (%edi) # *dst = cl 25 | #inc %edi # dst++ 26 | orl $-1, %ebx # ebx = len = -1 27 | decl %edx # left-- 28 | js .unpack_ret 29 | .checkleft: 30 | addl %eax,%eax 31 | jnz .nonextbit 32 | lodsl 33 | adcl %eax,%eax 34 | .nonextbit: 35 | .checkifletterorlz: 36 | jnc .copyletter 37 | .unpack_lz: 38 | incl %ebx 39 | push %edx # save left 40 | jnz .load_ofs 41 | addl %eax,%eax 42 | jnz .L9 43 | lodsl 44 | adcl %eax,%eax 45 | .L9: 46 | # ebx==0 at this point 47 | jnc .load_len 48 | .load_ofs: 49 | push %edi # save dst, we need edi as temp register 50 | xor %ebp,%ebp # ofs=0 51 | movzxb (%esi), %ecx # res=*src 52 | mov $512, %ebx # x=256 53 | subl 8(%esp), %edi # here (%esp) is pushed edi, 4(%esp) is left 54 | inc %ebp # ofs++ 55 | inc %esi 56 | #movl $48, %edx 57 | lea 47(%ebp),%edx 58 | cmp $652630,%edi 59 | ja .low 60 | mov $60,%dl 61 | cmp $49549,%edi 62 | ja .low 63 | mov $80,%dl 64 | .low: 65 | .loop_ofs: 66 | addl %edi,%edx # top+=total 67 | cmpl %edx,%ebx # cmp total+top,x 68 | jns .ofs_last_bit 69 | subl %edi,%edx # top-=total 70 | test $0x055ffc00,%ebx 71 | jz .noshift 72 | leal (%edx,%edx,8),%edx # top*=9 73 | shrl $3,%edx # top>>=3 74 | .noshift: 75 | cmp %edx,%ecx # cmp top,res 76 | jl .ofs_final_calc # if res 5 | #include 6 | 7 | uint BytesLoaded; 8 | 9 | uint flen( FILE* f ) 10 | { 11 | fseek( f, 0, SEEK_END ); 12 | uint len = ftell(f); 13 | fseek( f, 0, SEEK_SET ); 14 | return len; 15 | } 16 | 17 | void* fload( char* fname ) 18 | { 19 | FILE* temp = fopen(fname,"rb"); 20 | if (temp==0) return 0; 21 | unsigned int len = flen(temp); 22 | BytesLoaded = len; 23 | char* buf = new char[len]; 24 | fread( buf, len, 1, temp ); 25 | fclose( temp ); 26 | return buf; 27 | } 28 | 29 | void fsave( void* buf, unsigned int len, char* fname ) 30 | { 31 | FILE* temp = fopen(fname,"wb"); 32 | fwrite( buf, len, 1, temp ); 33 | fclose( temp ); 34 | } 35 | 36 | uint fgetd( FILE* file) 37 | { 38 | return fgetc(file)+(fgetc(file)<<8)+(fgetc(file)<<16)+(fgetc(file)<<24); 39 | } 40 | 41 | uint fgetw( FILE* file) 42 | { 43 | return fgetc(file)+(fgetc(file)<<8); 44 | } 45 | 46 | void fputd( uint c, FILE* file ) 47 | { 48 | fputc( c , file ); 49 | fputc( c>> 8, file ); 50 | fputc( c>>16, file ); 51 | fputc( c>>24, file ); 52 | } 53 | 54 | 55 | void fputw( uint c, FILE* file ) 56 | { 57 | fputc( c , file ); 58 | fputc( c>> 8, file ); 59 | } 60 | #define Psh(c) ( c==0x06 || c==0x16 || c==0x1E || (c>0x4F && c<0x58) ) 61 | 62 | #define wswap(a) ( ((a)>>8) + (((a)&255)<<8) ) 63 | #define bswap(a) ( wswap((a)>>16)+(wswap((a)&65535)<<16) ) 64 | //#define _bsw(a,i,h) (((uc(&)[4])(a))[i]<<(h)) 65 | //#define bswap(a) ( _bsw(a,0,24)+_bsw(a,1,16)+_bsw(a,2,8)+_bsw(a,3,0) ) 66 | 67 | #include 68 | std::map cofs; 69 | std::map jofs; 70 | #define mask 0xffffe000 71 | #define shift 0x1000 72 | int main(int argc,char* argv[]) 73 | { 74 | int cn=0; 75 | int jn=0; 76 | int i,j,k,len; uint a,b; 77 | 78 | uc* p; uc* q; 79 | FILE* Codes = fopen("main.dat","wb"); 80 | FILE* Calls = fopen("calls.dat","wb"); 81 | // FILE* Calls2 = fopen("calls2.dat","wb"); 82 | FILE* Jumps = fopen("jumps.dat","wb"); 83 | FILE* Flags = fopen("flags.dat","wb"); 84 | 85 | uc* Text = (uc*)fload(argv[1]); p=Text; 86 | 87 | for( i=0; i1 ) use=1; 141 | if ( a0 && p[a-1]==0xc3 ) use=1; 143 | 144 | if ( use ) { 145 | putc( 0x00, Flags ); 146 | 147 | fputd( bswap(a), Calls ); 148 | 149 | // if ( p[i+5]==0x83 && p[i+6]==0xC4 && p[i+7]>0 ) { 150 | // fputc( 1/*p[i+7]*/, Calls2 ); 151 | // i+=2;//3; 152 | // } else { 153 | // fputc( 0x00, Calls2 ); 154 | // } 155 | 156 | i+=4; 157 | } else { 158 | putc( 0x01, Flags ); 159 | } 160 | // } 161 | } 162 | 163 | if ( p[i]==0xE9 && i<=BytesLoaded-5-3 ) { 164 | a = i+5 + (uint&)p[i+1]; 165 | //if (a1 ) use=1; 169 | if ( a1 ) use=1; 186 | if ( a