├── .idea
├── modules.xml
└── vcs.xml
├── LICENSE
├── README.MD
├── lithestring.iml
└── src
└── lithe
└── core
└── LitheString.java
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 2.1, February 1999
3 |
4 | Copyright (C) 1991, 1999 Free Software Foundation, Inc.
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | (This is the first released version of the Lesser GPL. It also counts
10 | as the successor of the GNU Library Public License, version 2, hence
11 | the version number 2.1.)
12 |
13 | Preamble
14 |
15 | The licenses for most software are designed to take away your
16 | freedom to share and change it. By contrast, the GNU General Public
17 | Licenses are intended to guarantee your freedom to share and change
18 | free software--to make sure the software is free for all its users.
19 |
20 | This license, the Lesser General Public License, applies to some
21 | specially designated software packages--typically libraries--of the
22 | Free Software Foundation and other authors who decide to use it. You
23 | can use it too, but we suggest you first think carefully about whether
24 | this license or the ordinary General Public License is the better
25 | strategy to use in any particular case, based on the explanations below.
26 |
27 | When we speak of free software, we are referring to freedom of use,
28 | not price. Our General Public Licenses are designed to make sure that
29 | you have the freedom to distribute copies of free software (and charge
30 | for this service if you wish); that you receive source code or can get
31 | it if you want it; that you can change the software and use pieces of
32 | it in new free programs; and that you are informed that you can do
33 | these things.
34 |
35 | To protect your rights, we need to make restrictions that forbid
36 | distributors to deny you these rights or to ask you to surrender these
37 | rights. These restrictions translate to certain responsibilities for
38 | you if you distribute copies of the library or if you modify it.
39 |
40 | For example, if you distribute copies of the library, whether gratis
41 | or for a fee, you must give the recipients all the rights that we gave
42 | you. You must make sure that they, too, receive or can get the source
43 | code. If you link other code with the library, you must provide
44 | complete object files to the recipients, so that they can relink them
45 | with the library after making changes to the library and recompiling
46 | it. And you must show them these terms so they know their rights.
47 |
48 | We protect your rights with a two-step method: (1) we copyright the
49 | library, and (2) we offer you this license, which gives you legal
50 | permission to copy, distribute and/or modify the library.
51 |
52 | To protect each distributor, we want to make it very clear that
53 | there is no warranty for the free library. Also, if the library is
54 | modified by someone else and passed on, the recipients should know
55 | that what they have is not the original version, so that the original
56 | author's reputation will not be affected by problems that might be
57 | introduced by others.
58 |
59 | Finally, software patents pose a constant threat to the existence of
60 | any free program. We wish to make sure that a company cannot
61 | effectively restrict the users of a free program by obtaining a
62 | restrictive license from a patent holder. Therefore, we insist that
63 | any patent license obtained for a version of the library must be
64 | consistent with the full freedom of use specified in this license.
65 |
66 | Most GNU software, including some libraries, is covered by the
67 | ordinary GNU General Public License. This license, the GNU Lesser
68 | General Public License, applies to certain designated libraries, and
69 | is quite different from the ordinary General Public License. We use
70 | this license for certain libraries in order to permit linking those
71 | libraries into non-free programs.
72 |
73 | When a program is linked with a library, whether statically or using
74 | a shared library, the combination of the two is legally speaking a
75 | combined work, a derivative of the original library. The ordinary
76 | General Public License therefore permits such linking only if the
77 | entire combination fits its criteria of freedom. The Lesser General
78 | Public License permits more lax criteria for linking other code with
79 | the library.
80 |
81 | We call this license the "Lesser" General Public License because it
82 | does Less to protect the user's freedom than the ordinary General
83 | Public License. It also provides other free software developers Less
84 | of an advantage over competing non-free programs. These disadvantages
85 | are the reason we use the ordinary General Public License for many
86 | libraries. However, the Lesser license provides advantages in certain
87 | special circumstances.
88 |
89 | For example, on rare occasions, there may be a special need to
90 | encourage the widest possible use of a certain library, so that it becomes
91 | a de-facto standard. To achieve this, non-free programs must be
92 | allowed to use the library. A more frequent case is that a free
93 | library does the same job as widely used non-free libraries. In this
94 | case, there is little to gain by limiting the free library to free
95 | software only, so we use the Lesser General Public License.
96 |
97 | In other cases, permission to use a particular library in non-free
98 | programs enables a greater number of people to use a large body of
99 | free software. For example, permission to use the GNU C Library in
100 | non-free programs enables many more people to use the whole GNU
101 | operating system, as well as its variant, the GNU/Linux operating
102 | system.
103 |
104 | Although the Lesser General Public License is Less protective of the
105 | users' freedom, it does ensure that the user of a program that is
106 | linked with the Library has the freedom and the wherewithal to run
107 | that program using a modified version of the Library.
108 |
109 | The precise terms and conditions for copying, distribution and
110 | modification follow. Pay close attention to the difference between a
111 | "work based on the library" and a "work that uses the library". The
112 | former contains code derived from the library, whereas the latter must
113 | be combined with the library in order to run.
114 |
115 | GNU LESSER GENERAL PUBLIC LICENSE
116 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
117 |
118 | 0. This License Agreement applies to any software library or other
119 | program which contains a notice placed by the copyright holder or
120 | other authorized party saying it may be distributed under the terms of
121 | this Lesser General Public License (also called "this License").
122 | Each licensee is addressed as "you".
123 |
124 | A "library" means a collection of software functions and/or data
125 | prepared so as to be conveniently linked with application programs
126 | (which use some of those functions and data) to form executables.
127 |
128 | The "Library", below, refers to any such software library or work
129 | which has been distributed under these terms. A "work based on the
130 | Library" means either the Library or any derivative work under
131 | copyright law: that is to say, a work containing the Library or a
132 | portion of it, either verbatim or with modifications and/or translated
133 | straightforwardly into another language. (Hereinafter, translation is
134 | included without limitation in the term "modification".)
135 |
136 | "Source code" for a work means the preferred form of the work for
137 | making modifications to it. For a library, complete source code means
138 | all the source code for all modules it contains, plus any associated
139 | interface definition files, plus the scripts used to control compilation
140 | and installation of the library.
141 |
142 | Activities other than copying, distribution and modification are not
143 | covered by this License; they are outside its scope. The act of
144 | running a program using the Library is not restricted, and output from
145 | such a program is covered only if its contents constitute a work based
146 | on the Library (independent of the use of the Library in a tool for
147 | writing it). Whether that is true depends on what the Library does
148 | and what the program that uses the Library does.
149 |
150 | 1. You may copy and distribute verbatim copies of the Library's
151 | complete source code as you receive it, in any medium, provided that
152 | you conspicuously and appropriately publish on each copy an
153 | appropriate copyright notice and disclaimer of warranty; keep intact
154 | all the notices that refer to this License and to the absence of any
155 | warranty; and distribute a copy of this License along with the
156 | Library.
157 |
158 | You may charge a fee for the physical act of transferring a copy,
159 | and you may at your option offer warranty protection in exchange for a
160 | fee.
161 |
162 | 2. You may modify your copy or copies of the Library or any portion
163 | of it, thus forming a work based on the Library, and copy and
164 | distribute such modifications or work under the terms of Section 1
165 | above, provided that you also meet all of these conditions:
166 |
167 | a) The modified work must itself be a software library.
168 |
169 | b) You must cause the files modified to carry prominent notices
170 | stating that you changed the files and the date of any change.
171 |
172 | c) You must cause the whole of the work to be licensed at no
173 | charge to all third parties under the terms of this License.
174 |
175 | d) If a facility in the modified Library refers to a function or a
176 | table of data to be supplied by an application program that uses
177 | the facility, other than as an argument passed when the facility
178 | is invoked, then you must make a good faith effort to ensure that,
179 | in the event an application does not supply such function or
180 | table, the facility still operates, and performs whatever part of
181 | its purpose remains meaningful.
182 |
183 | (For example, a function in a library to compute square roots has
184 | a purpose that is entirely well-defined independent of the
185 | application. Therefore, Subsection 2d requires that any
186 | application-supplied function or table used by this function must
187 | be optional: if the application does not supply it, the square
188 | root function must still compute square roots.)
189 |
190 | These requirements apply to the modified work as a whole. If
191 | identifiable sections of that work are not derived from the Library,
192 | and can be reasonably considered independent and separate works in
193 | themselves, then this License, and its terms, do not apply to those
194 | sections when you distribute them as separate works. But when you
195 | distribute the same sections as part of a whole which is a work based
196 | on the Library, the distribution of the whole must be on the terms of
197 | this License, whose permissions for other licensees extend to the
198 | entire whole, and thus to each and every part regardless of who wrote
199 | it.
200 |
201 | Thus, it is not the intent of this section to claim rights or contest
202 | your rights to work written entirely by you; rather, the intent is to
203 | exercise the right to control the distribution of derivative or
204 | collective works based on the Library.
205 |
206 | In addition, mere aggregation of another work not based on the Library
207 | with the Library (or with a work based on the Library) on a volume of
208 | a storage or distribution medium does not bring the other work under
209 | the scope of this License.
210 |
211 | 3. You may opt to apply the terms of the ordinary GNU General Public
212 | License instead of this License to a given copy of the Library. To do
213 | this, you must alter all the notices that refer to this License, so
214 | that they refer to the ordinary GNU General Public License, version 2,
215 | instead of to this License. (If a newer version than version 2 of the
216 | ordinary GNU General Public License has appeared, then you can specify
217 | that version instead if you wish.) Do not make any other change in
218 | these notices.
219 |
220 | Once this change is made in a given copy, it is irreversible for
221 | that copy, so the ordinary GNU General Public License applies to all
222 | subsequent copies and derivative works made from that copy.
223 |
224 | This option is useful when you wish to copy part of the code of
225 | the Library into a program that is not a library.
226 |
227 | 4. You may copy and distribute the Library (or a portion or
228 | derivative of it, under Section 2) in object code or executable form
229 | under the terms of Sections 1 and 2 above provided that you accompany
230 | it with the complete corresponding machine-readable source code, which
231 | must be distributed under the terms of Sections 1 and 2 above on a
232 | medium customarily used for software interchange.
233 |
234 | If distribution of object code is made by offering access to copy
235 | from a designated place, then offering equivalent access to copy the
236 | source code from the same place satisfies the requirement to
237 | distribute the source code, even though third parties are not
238 | compelled to copy the source along with the object code.
239 |
240 | 5. A program that contains no derivative of any portion of the
241 | Library, but is designed to work with the Library by being compiled or
242 | linked with it, is called a "work that uses the Library". Such a
243 | work, in isolation, is not a derivative work of the Library, and
244 | therefore falls outside the scope of this License.
245 |
246 | However, linking a "work that uses the Library" with the Library
247 | creates an executable that is a derivative of the Library (because it
248 | contains portions of the Library), rather than a "work that uses the
249 | library". The executable is therefore covered by this License.
250 | Section 6 states terms for distribution of such executables.
251 |
252 | When a "work that uses the Library" uses material from a header file
253 | that is part of the Library, the object code for the work may be a
254 | derivative work of the Library even though the source code is not.
255 | Whether this is true is especially significant if the work can be
256 | linked without the Library, or if the work is itself a library. The
257 | threshold for this to be true is not precisely defined by law.
258 |
259 | If such an object file uses only numerical parameters, data
260 | structure layouts and accessors, and small macros and small inline
261 | functions (ten lines or less in length), then the use of the object
262 | file is unrestricted, regardless of whether it is legally a derivative
263 | work. (Executables containing this object code plus portions of the
264 | Library will still fall under Section 6.)
265 |
266 | Otherwise, if the work is a derivative of the Library, you may
267 | distribute the object code for the work under the terms of Section 6.
268 | Any executables containing that work also fall under Section 6,
269 | whether or not they are linked directly with the Library itself.
270 |
271 | 6. As an exception to the Sections above, you may also combine or
272 | link a "work that uses the Library" with the Library to produce a
273 | work containing portions of the Library, and distribute that work
274 | under terms of your choice, provided that the terms permit
275 | modification of the work for the customer's own use and reverse
276 | engineering for debugging such modifications.
277 |
278 | You must give prominent notice with each copy of the work that the
279 | Library is used in it and that the Library and its use are covered by
280 | this License. You must supply a copy of this License. If the work
281 | during execution displays copyright notices, you must include the
282 | copyright notice for the Library among them, as well as a reference
283 | directing the user to the copy of this License. Also, you must do one
284 | of these things:
285 |
286 | a) Accompany the work with the complete corresponding
287 | machine-readable source code for the Library including whatever
288 | changes were used in the work (which must be distributed under
289 | Sections 1 and 2 above); and, if the work is an executable linked
290 | with the Library, with the complete machine-readable "work that
291 | uses the Library", as object code and/or source code, so that the
292 | user can modify the Library and then relink to produce a modified
293 | executable containing the modified Library. (It is understood
294 | that the user who changes the contents of definitions files in the
295 | Library will not necessarily be able to recompile the application
296 | to use the modified definitions.)
297 |
298 | b) Use a suitable shared library mechanism for linking with the
299 | Library. A suitable mechanism is one that (1) uses at run time a
300 | copy of the library already present on the user's computer system,
301 | rather than copying library functions into the executable, and (2)
302 | will operate properly with a modified version of the library, if
303 | the user installs one, as long as the modified version is
304 | interface-compatible with the version that the work was made with.
305 |
306 | c) Accompany the work with a written offer, valid for at
307 | least three years, to give the same user the materials
308 | specified in Subsection 6a, above, for a charge no more
309 | than the cost of performing this distribution.
310 |
311 | d) If distribution of the work is made by offering access to copy
312 | from a designated place, offer equivalent access to copy the above
313 | specified materials from the same place.
314 |
315 | e) Verify that the user has already received a copy of these
316 | materials or that you have already sent this user a copy.
317 |
318 | For an executable, the required form of the "work that uses the
319 | Library" must include any data and utility programs needed for
320 | reproducing the executable from it. However, as a special exception,
321 | the materials to be distributed need not include anything that is
322 | normally distributed (in either source or binary form) with the major
323 | components (compiler, kernel, and so on) of the operating system on
324 | which the executable runs, unless that component itself accompanies
325 | the executable.
326 |
327 | It may happen that this requirement contradicts the license
328 | restrictions of other proprietary libraries that do not normally
329 | accompany the operating system. Such a contradiction means you cannot
330 | use both them and the Library together in an executable that you
331 | distribute.
332 |
333 | 7. You may place library facilities that are a work based on the
334 | Library side-by-side in a single library together with other library
335 | facilities not covered by this License, and distribute such a combined
336 | library, provided that the separate distribution of the work based on
337 | the Library and of the other library facilities is otherwise
338 | permitted, and provided that you do these two things:
339 |
340 | a) Accompany the combined library with a copy of the same work
341 | based on the Library, uncombined with any other library
342 | facilities. This must be distributed under the terms of the
343 | Sections above.
344 |
345 | b) Give prominent notice with the combined library of the fact
346 | that part of it is a work based on the Library, and explaining
347 | where to find the accompanying uncombined form of the same work.
348 |
349 | 8. You may not copy, modify, sublicense, link with, or distribute
350 | the Library except as expressly provided under this License. Any
351 | attempt otherwise to copy, modify, sublicense, link with, or
352 | distribute the Library is void, and will automatically terminate your
353 | rights under this License. However, parties who have received copies,
354 | or rights, from you under this License will not have their licenses
355 | terminated so long as such parties remain in full compliance.
356 |
357 | 9. You are not required to accept this License, since you have not
358 | signed it. However, nothing else grants you permission to modify or
359 | distribute the Library or its derivative works. These actions are
360 | prohibited by law if you do not accept this License. Therefore, by
361 | modifying or distributing the Library (or any work based on the
362 | Library), you indicate your acceptance of this License to do so, and
363 | all its terms and conditions for copying, distributing or modifying
364 | the Library or works based on it.
365 |
366 | 10. Each time you redistribute the Library (or any work based on the
367 | Library), the recipient automatically receives a license from the
368 | original licensor to copy, distribute, link with or modify the Library
369 | subject to these terms and conditions. You may not impose any further
370 | restrictions on the recipients' exercise of the rights granted herein.
371 | You are not responsible for enforcing compliance by third parties with
372 | this License.
373 |
374 | 11. If, as a consequence of a court judgment or allegation of patent
375 | infringement or for any other reason (not limited to patent issues),
376 | conditions are imposed on you (whether by court order, agreement or
377 | otherwise) that contradict the conditions of this License, they do not
378 | excuse you from the conditions of this License. If you cannot
379 | distribute so as to satisfy simultaneously your obligations under this
380 | License and any other pertinent obligations, then as a consequence you
381 | may not distribute the Library at all. For example, if a patent
382 | license would not permit royalty-free redistribution of the Library by
383 | all those who receive copies directly or indirectly through you, then
384 | the only way you could satisfy both it and this License would be to
385 | refrain entirely from distribution of the Library.
386 |
387 | If any portion of this section is held invalid or unenforceable under any
388 | particular circumstance, the balance of the section is intended to apply,
389 | and the section as a whole is intended to apply in other circumstances.
390 |
391 | It is not the purpose of this section to induce you to infringe any
392 | patents or other property right claims or to contest validity of any
393 | such claims; this section has the sole purpose of protecting the
394 | integrity of the free software distribution system which is
395 | implemented by public license practices. Many people have made
396 | generous contributions to the wide range of software distributed
397 | through that system in reliance on consistent application of that
398 | system; it is up to the author/donor to decide if he or she is willing
399 | to distribute software through any other system and a licensee cannot
400 | impose that choice.
401 |
402 | This section is intended to make thoroughly clear what is believed to
403 | be a consequence of the rest of this License.
404 |
405 | 12. If the distribution and/or use of the Library is restricted in
406 | certain countries either by patents or by copyrighted interfaces, the
407 | original copyright holder who places the Library under this License may add
408 | an explicit geographical distribution limitation excluding those countries,
409 | so that distribution is permitted only in or among countries not thus
410 | excluded. In such case, this License incorporates the limitation as if
411 | written in the body of this License.
412 |
413 | 13. The Free Software Foundation may publish revised and/or new
414 | versions of the Lesser General Public License from time to time.
415 | Such new versions will be similar in spirit to the present version,
416 | but may differ in detail to address new problems or concerns.
417 |
418 | Each version is given a distinguishing version number. If the Library
419 | specifies a version number of this License which applies to it and
420 | "any later version", you have the option of following the terms and
421 | conditions either of that version or of any later version published by
422 | the Free Software Foundation. If the Library does not specify a
423 | license version number, you may choose any version ever published by
424 | the Free Software Foundation.
425 |
426 | 14. If you wish to incorporate parts of the Library into other free
427 | programs whose distribution conditions are incompatible with these,
428 | write to the author to ask for permission. For software which is
429 | copyrighted by the Free Software Foundation, write to the Free
430 | Software Foundation; we sometimes make exceptions for this. Our
431 | decision will be guided by the two goals of preserving the free status
432 | of all derivatives of our free software and of promoting the sharing
433 | and reuse of software generally.
434 |
435 | NO WARRANTY
436 |
437 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
438 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
439 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
440 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
441 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
442 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
443 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
444 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
445 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
446 |
447 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
448 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
449 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
450 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
451 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
452 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
453 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
454 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
455 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
456 | DAMAGES.
457 |
458 | END OF TERMS AND CONDITIONS
459 |
460 | How to Apply These Terms to Your New Libraries
461 |
462 | If you develop a new library, and you want it to be of the greatest
463 | possible use to the public, we recommend making it free software that
464 | everyone can redistribute and change. You can do so by permitting
465 | redistribution under these terms (or, alternatively, under the terms of the
466 | ordinary General Public License).
467 |
468 | To apply these terms, attach the following notices to the library. It is
469 | safest to attach them to the start of each source file to most effectively
470 | convey the exclusion of warranty; and each file should have at least the
471 | "copyright" line and a pointer to where the full notice is found.
472 |
473 | {description}
474 | Copyright (C) {year} {fullname}
475 |
476 | This library is free software; you can redistribute it and/or
477 | modify it under the terms of the GNU Lesser General Public
478 | License as published by the Free Software Foundation; either
479 | version 2.1 of the License, or (at your option) any later version.
480 |
481 | This library is distributed in the hope that it will be useful,
482 | but WITHOUT ANY WARRANTY; without even the implied warranty of
483 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
484 | Lesser General Public License for more details.
485 |
486 | You should have received a copy of the GNU Lesser General Public
487 | License along with this library; if not, write to the Free Software
488 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
489 | USA
490 |
491 | Also add information on how to contact you by electronic and paper mail.
492 |
493 | You should also get your employer (if you work as a programmer) or your
494 | school, if any, to sign a "copyright disclaimer" for the library, if
495 | necessary. Here is a sample; alter the names:
496 |
497 | Yoyodyne, Inc., hereby disclaims all copyright interest in the
498 | library `Frob' (a library for tweaking knobs) written by James Random
499 | Hacker.
500 |
501 | {signature of Ty Coon}, 1 April 1990
502 | Ty Coon, President of Vice
503 |
504 | That's all there is to it!
--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
1 | # lithestring
2 |
3 | ## Synopsis
4 |
5 | Java class to compress short (or long) strings
6 |
7 | Included in the lithecore library https://github.com/lithedream/lithecore
8 |
9 | ## Motivation
10 |
11 | I dreamed a compression algorithm expecially useful for short strings, without fear of taking up more space than the UTF-8 encoding of the original string.
12 |
13 | The compression algorithm chooses the best approach between:
14 | * Plain UTF-8 encoding (as is, without overhead)
15 | * An encoding which uses 5 bits for a-z, A-Z, space, and encodes every other UTF-8 character with 3 bits of overhead (for really short Latin strings)
16 | * An intermediate algorithm based on Huffman encoding (dictionary header, then encoded string)
17 | * 1 byte of overhead then GZIP compression (for long strings)
18 |
19 | The decompression algorithm looks if it is a plain UTF-8 encoding or a compressed one, and in the latter case reads the data header to apply the correct decoding algorithm.
20 |
21 | ## Code Example
22 |
23 | ```java
24 | String input = ...;
25 | byte[] compressed = LitheString.zip(input); // in the worst case, compressed is the plain UTF-8 encoding of input
26 | String uncompressed = LitheString.unzip(compressed);
27 |
28 | if (input.equals(uncompressed)){
29 | System.out.println("It works!");
30 | } else {
31 | System.out.println("Please submit a bug for "+input);
32 | }
33 | ```
34 |
35 | ## Author
36 |
37 | * **lithedream**
38 |
39 | ## License
40 |
41 | LGPL-2.1
42 |
--------------------------------------------------------------------------------
/lithestring.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/src/lithe/core/LitheString.java:
--------------------------------------------------------------------------------
1 | package lithe.core;
2 |
3 | import java.io.*;
4 | import java.nio.charset.StandardCharsets;
5 | import java.util.*;
6 | import java.util.zip.GZIPInputStream;
7 | import java.util.zip.GZIPOutputStream;
8 |
9 |
10 | public class LitheString {
11 |
12 | private byte[] content;
13 |
14 | public LitheString(String input) {
15 | this.content = zip(input);
16 | }
17 |
18 | @Override
19 | public boolean equals(Object obj) {
20 | return this == obj || (obj instanceof LitheString && Arrays.equals(((LitheString) obj).content, this.content));
21 | }
22 |
23 | @Override
24 | public int hashCode() {
25 | return Arrays.hashCode(content);
26 | }
27 |
28 | /**
29 | * Returns the zipped byte[] content
30 | *
31 | * @return the zipped byte[] content
32 | */
33 | public byte[] getBytes() {
34 | return content;
35 | }
36 |
37 | /**
38 | * Returns the corresponding String content
39 | *
40 | * @return the corresponding String content
41 | */
42 | public String getString() {
43 | return unzip(content);
44 | }
45 |
46 | /**
47 | * Compresses the string as best as it can
48 | *
49 | * @param input
50 | * @return the compressed byte[]
51 | */
52 | public static byte[] zip(String input) {
53 | byte[] z0 = input.getBytes(StandardCharsets.UTF_8);
54 | return zipUTF8(z0);
55 | }
56 |
57 | /**
58 | * Compresses the string already in UTF-8 form as best as it can
59 | *
60 | * @param utf8Input
61 | * @return the compressed byte[]
62 | */
63 | public static byte[] zipUTF8(byte[] utf8Input) {
64 | byte[] z1 = z1UTF8(utf8Input);
65 | byte[] z2 = z2UTF8(utf8Input);
66 | byte[] z3 = z3UTF8(utf8Input);
67 |
68 | return shortest(utf8Input, z1, z2, z3);
69 | }
70 |
71 | /**
72 | * Compresses the string and checks if the encoding is correct, throwing exception if it didn't work
73 | *
74 | * @param input
75 | * @return the compressed byte[]
76 | */
77 | public static byte[] secureZip(String input) {
78 | byte[] zipped = zip(input);
79 | String unzipped = unzip(zipped);
80 | if (!input.equals(unzipped)) {
81 | throw new IllegalArgumentException("Error in encoding String '" + (input.length() > 100 ? input.substring(0, 100) + "..." : input) + "'");
82 | }
83 | return zipped;
84 | }
85 |
86 | /**
87 | * Compresses the string using a custom encoding with 5 bits for a-z and space charactes, and adds 3 bits to every other UTF-8 character
88 | *
89 | * @param input
90 | * @return the compressed byte[]
91 | */
92 | public static byte[] z1(String input) {
93 | return z1UTF8(input.getBytes(StandardCharsets.UTF_8));
94 | }
95 |
96 | /**
97 | * Like z1, but with an UTF-8 encoded string as input
98 | *
99 | * @param utf8Input
100 | * @return the compressed byte[]
101 | */
102 | public static byte[] z1UTF8(byte[] utf8Input) {
103 | BitWriter output = new BitWriter();
104 | output.write01("100");
105 |
106 | boolean caps = false;
107 | try (ByteArrayInputStream bais = new ByteArrayInputStream(utf8Input)) {
108 | int in;
109 | while ((in = bais.read()) != -1) {
110 | byte byt = (byte) in;
111 | int nExtraByte = getNExtraBytes(byt);
112 | for (int i = 0; i < nExtraByte; i++) {
113 | bais.read();
114 | }
115 | if (byt >= 97 && byt <= 122) { // lower
116 | caps = false;
117 | break;
118 |
119 | }
120 | if (byt >= 65 && byt <= 90) { // upper
121 | caps = true;
122 | break;
123 | }
124 | }
125 | } catch (IOException e) {
126 |
127 | }
128 | output.write(caps);
129 |
130 | try (PushbackInputStream bais = new PushbackInputStream(new ByteArrayInputStream(utf8Input))) {
131 | int in;
132 | while ((in = bais.read()) != -1) {
133 | byte byt = (byte) in;
134 | if (byt >= 97 && byt <= 122) { // lower
135 | if (caps) {
136 | int in2 = bais.read();
137 | if (in2 != -1) {
138 | bais.unread(in2); // push it back
139 | if (in2 >= 65 && in2 <= 90) { // if the next is upper
140 | output.write01("111"); // write this in utf8
141 | output.write(byt);
142 | continue;
143 | }
144 | }
145 | output.write01("00000");
146 | caps = !caps;
147 | }
148 | output.writeLast5Bits((byte) (byt - 96));
149 |
150 | } else if (byt >= 65 && byt <= 90) { // upper
151 | if (!caps) {
152 | int in2 = bais.read();
153 | if (in2 != -1) {
154 | bais.unread(in2); // push it back
155 | if (in2 >= 97 && in2 <= 122) { // if the next is lower
156 | output.write01("111"); // write this in utf8
157 | output.write(byt);
158 | continue;
159 | }
160 | }
161 | output.write01("00000");
162 | caps = !caps;
163 | }
164 | output.writeLast5Bits((byte) (byt - 64));
165 |
166 | } else if (byt == 32) { //space
167 | output.write01("11011");
168 | } else {
169 | output.write01("111");
170 | output.write(byt);
171 | for (int i = 0; i < getNExtraBytes(byt); i++) {
172 | output.write((byte) bais.read());
173 | }
174 | }
175 | }
176 | } catch (IOException e) {
177 |
178 | }
179 | output.close();
180 | return output.toByteArray();
181 | }
182 |
183 | /**
184 | * Returns how many bytes are after this to complete the UTF-8 character
185 | *
186 | * @param byt
187 | * @return the number of extra bytes
188 | */
189 | private static int getNExtraBytes(byte byt) {
190 | int nExtraByte = 0;
191 | if (!startsWith(byt, "0")) { //1 byte
192 | if (startsWith(byt, "110")) { //2 byte
193 | nExtraByte = 1;
194 | } else if (startsWith(byt, "1110")) { //3 byte
195 | nExtraByte = 2;
196 | } else if (startsWith(byt, "11110")) { //4 byte
197 | nExtraByte = 3;
198 | }
199 | }
200 | return nExtraByte;
201 | }
202 |
203 | /**
204 | * Returns if byte parameter starts with the sequence of "010..." as written in binaryString
205 | *
206 | * @param byt
207 | * @param binaryString
208 | * @return if the 010... of binaryString match the start of byt
209 | */
210 | private static boolean startsWith(byte byt, String binaryString) {
211 | byte pos = 7;
212 | for (int i = 0; i < binaryString.length(); i++) {
213 | boolean iBitIsSet = binaryString.charAt(i) == ('1');
214 | boolean bitValue = getNBitValue(byt, pos);
215 | if (bitValue != iBitIsSet) {
216 | return false;
217 | }
218 | if (pos == 0) {
219 | break;
220 | } else {
221 | pos--;
222 | }
223 | }
224 | return true;
225 | }
226 |
227 | /**
228 | * Compresses the string using a modified Huffman encoding
229 | *
230 | * @param input
231 | * @return the compressed byte[]
232 | */
233 | public static byte[] z2(String input) {
234 | return z2UTF8(input.getBytes(StandardCharsets.UTF_8));
235 | }
236 |
237 | /**
238 | * Like z2, but with an UTF-8 encoded string as input
239 | *
240 | * @param input
241 | * @return the compressed byte[]
242 | */
243 | public static byte[] z2UTF8(byte[] input) {
244 | List listChars = new ArrayList<>();
245 | try (ByteArrayInputStream bais = new ByteArrayInputStream(input)) {
246 | int in;
247 | while ((in = bais.read()) != -1) {
248 | byte byt = (byte) in;
249 | int nExtraByte = getNExtraBytes(byt);
250 |
251 | byte[] utf8Bytes = new byte[nExtraByte + 1];
252 | utf8Bytes[0] = byt;
253 |
254 | for (int i = 0; i < nExtraByte; i++) {
255 | utf8Bytes[i + 1] = (byte) bais.read();
256 | }
257 | listChars.add(new UTF8Char(utf8Bytes));
258 | }
259 | } catch (IOException e) {
260 |
261 | }
262 |
263 | Map objectFreqs = Huffer.makeFreqs(listChars);
264 | {
265 | int howMany = 0;
266 | for (Iterator> it = objectFreqs.entrySet().iterator(); it.hasNext(); ) {
267 | Map.Entry next = it.next();
268 | if (next.getValue() == 1) {
269 | it.remove();
270 | howMany++;
271 | }
272 | }
273 | if (howMany > 0) {
274 | objectFreqs.put(UTF8Char.getInvalidChar(), howMany);
275 | }
276 | }
277 | Map huff = Huffer.makeMap(objectFreqs);
278 |
279 | List> listHuff = new ArrayList<>();
280 | for (Map.Entry e : huff.entrySet()) {
281 | listHuff.add(new AbstractMap.SimpleEntry(e.getKey(), e.getValue()));
282 | }
283 | Collections.sort(listHuff, new Comparator>() {
284 | @Override
285 | public int compare(Map.Entry o1, Map.Entry o2) {
286 | return Integer.compare(o1.getValue().length(), o2.getValue().length());
287 | }
288 | });
289 |
290 | BitWriter output = innerZ2(listChars, 0, huff, listHuff);
291 | int spareBits = output.getSpareBits();
292 | if (spareBits > 0) {
293 | output = innerZ2(listChars, spareBits, huff, listHuff);
294 | }
295 | output.close();
296 | return output.toByteArray();
297 | }
298 |
299 | /**
300 | * Inner workings of z2
301 | *
302 | * @param listChars
303 | * @param spareBits
304 | * @param huff
305 | * @param listHuff
306 | * @return
307 | */
308 | private static BitWriter innerZ2(List listChars, int spareBits, Map huff, List> listHuff) {
309 | BitWriter output = new BitWriter();
310 | output.write01("1010");
311 | for (int i = 0; i < spareBits; i++) {
312 | output.write01("0");
313 | }
314 | output.write01("1");
315 |
316 | int length = 0;
317 | for (Map.Entry e : listHuff) {
318 | int difference = e.getValue().length() - length;
319 | if (difference > 0) {
320 | for (int i = 0; i < difference; i++) {
321 | output.write01("0");
322 | }
323 | output.write01("1");
324 | length = e.getValue().length();
325 | } else {
326 | output.write01("10");
327 | }
328 | output.write01(e.getValue());
329 | if (e.getKey().isInvalid()) {
330 | output.write01("10");
331 | } else {
332 | for (int i = 0; i < e.getKey().getBytes().length; i++) {
333 | output.write(e.getKey().getBytes()[i]);
334 | }
335 | }
336 | }
337 | output.write01("11");
338 | for (UTF8Char c : listChars) {
339 | String s = huff.get(c);
340 | if (s != null) {
341 | output.write01(s);
342 |
343 | } else {
344 | output.write01(huff.get(UTF8Char.getInvalidChar()));
345 | for (byte b : c.getBytes()) {
346 | output.write(b);
347 | }
348 | }
349 | }
350 | return output;
351 | }
352 |
353 | /**
354 | * Compresses the string with 1 byte of header + standard gzip encoding of the UTF-8 content
355 | *
356 | * @param input
357 | * @return the compressed byte[]
358 | */
359 | public static byte[] z3(String input) {
360 | ByteArrayOutputStream output = new ByteArrayOutputStream();
361 | output.write(0b10111111);
362 | try {
363 | try (Writer writer = new OutputStreamWriter(new GZIPOutputStream(output), StandardCharsets.UTF_8)) {
364 | writer.write(input);
365 | }
366 | } catch (IOException e) {
367 | throw new IllegalArgumentException(e);
368 | } finally {
369 | try {
370 | output.close();
371 | } catch (IOException e) {
372 | throw new IllegalArgumentException(e);
373 | }
374 | }
375 |
376 | return output.toByteArray();
377 | }
378 |
379 | /**
380 | * Like z3, but with an UTF-8 encoded string as input
381 | *
382 | * @param input
383 | * @return the compressed byte[]
384 | */
385 | public static byte[] z3UTF8(byte[] input) {
386 | ByteArrayOutputStream output = new ByteArrayOutputStream();
387 | output.write(0b10111111);
388 | try {
389 | try (GZIPOutputStream writer = new GZIPOutputStream(output)) {
390 | writer.write(input, 0, input.length);
391 | }
392 | } catch (IOException e) {
393 | throw new IllegalArgumentException(e);
394 | } finally {
395 | try {
396 | output.close();
397 | } catch (IOException e) {
398 | throw new IllegalArgumentException(e);
399 | }
400 | }
401 | return output.toByteArray();
402 | }
403 |
404 | /**
405 | * Uncompresses the compressed content with the right algorithm
406 | *
407 | * @param content
408 | * @return the original string
409 | */
410 | public static String unzip(byte[] content) {
411 | if (content == null) {
412 | return null;
413 | }
414 | if (content.length == 0) {
415 | return "";
416 | }
417 | if (startsWith(content[0], "100")) {
418 | return unzip1(content);
419 | }
420 | if (startsWith(content[0], "1010")) {
421 | return unzip2(content);
422 | }
423 | if ((content[0] & 0xFF) == 0b10111111) {
424 | return unzip3(content);
425 | }
426 | return new String(content, StandardCharsets.UTF_8);
427 | }
428 |
429 | /**
430 | * Uncompresses the compressed content using type1 algorithm
431 | *
432 | * @param content
433 | * @return the original string
434 | */
435 | private static String unzip1(byte[] content) {
436 | BitReader bitReader = new BitReader(content);
437 | bitReader.advance(3);
438 |
439 | ByteArrayOutputStream baos = new ByteArrayOutputStream();
440 | boolean caps = bitReader.read(1) == 1;
441 | while (!bitReader.isClosed()) {
442 | if (bitReader.peek01("111")) {
443 | bitReader.advance(3);
444 | byte read = bitReader.read();
445 |
446 | baos.write(read);
447 | for (int i = 0; i < getNExtraBytes(read); i++) {
448 | baos.write(bitReader.read());
449 | }
450 | } else {
451 | byte read = bitReader.read(5);
452 | if (read == 0) {
453 | caps = !caps;
454 | } else if ((read & 0xFF) == 0b00011011) {
455 | baos.write(32); // space
456 | } else {
457 | baos.write(read + (caps ? 64 : 96)); // UPPER:lower
458 | }
459 | }
460 | }
461 | return new String(baos.toByteArray(), StandardCharsets.UTF_8);
462 | }
463 |
464 | /**
465 | * Uncompresses the compressed content using type2 algorithm
466 | *
467 | * @param content
468 | * @return the original string
469 | */
470 | private static String unzip2(byte[] content) {
471 | BitReader bitReader = new BitReader(content);
472 | bitReader.advance(4);
473 | while (bitReader.read(1) == 0) ;
474 |
475 | int keyLength = 0;
476 | Trie01 trie = new Trie01<>();
477 | Map mmm = new HashMap<>();
478 | while (!bitReader.isClosed()) {
479 | if (bitReader.peek01("0")) {
480 | while (bitReader.read(1) == 0) {
481 | keyLength++;
482 | }
483 | } else if (bitReader.peek01("10")) {
484 | bitReader.advance(2);
485 | } else if (bitReader.peek01("11")) {
486 | bitReader.advance(2);
487 | break;
488 | }
489 | String key = bitReader.readAsString(keyLength);
490 | if (bitReader.peek01("10")) {
491 | bitReader.advance(2);
492 | UTF8Char u = UTF8Char.getInvalidChar();
493 | trie.add(key, u);
494 | mmm.put(key, u.asString());
495 | } else {
496 | byte read = bitReader.read();
497 |
498 | int nExtraBytes = getNExtraBytes(read);
499 | byte[] bytes = new byte[nExtraBytes + 1];
500 | bytes[0] = read;
501 | for (int i = 0; i < nExtraBytes; i++) {
502 | bytes[i + 1] = bitReader.read();
503 | }
504 | UTF8Char u = new UTF8Char(bytes);
505 | trie.add(key, u);
506 | mmm.put(key, u.asString());
507 | }
508 | }
509 |
510 | ByteArrayOutputStream baos = new ByteArrayOutputStream();
511 | while (!bitReader.isClosed()) {
512 | Trie01.Scanner scanner = trie.scan(bitReader.read01Char());
513 | while (!scanner.hasValue()) {
514 | scanner.scan(bitReader.read01Char());
515 | }
516 | UTF8Char value = scanner.getValue();
517 | if (value.isInvalid()) {
518 | byte read = bitReader.read();
519 |
520 | int nExtraBytes = getNExtraBytes(read);
521 | byte[] bytes = new byte[nExtraBytes + 1];
522 | bytes[0] = read;
523 | for (int i = 0; i < nExtraBytes; i++) {
524 | bytes[i + 1] = bitReader.read();
525 | }
526 | UTF8Char u = new UTF8Char(bytes);
527 | for (byte b : u.getBytes()) {
528 | baos.write(b);
529 | }
530 | } else {
531 | for (byte b : value.getBytes()) {
532 | baos.write(b);
533 | }
534 | }
535 | }
536 | return new String(baos.toByteArray(), StandardCharsets.UTF_8);
537 | }
538 |
539 | /**
540 | * Uncompresses the compressed content using type3 algorithm
541 | *
542 | * @param content
543 | * @return the original string
544 | */
545 | private static String unzip3(byte[] content) {
546 | ByteArrayInputStream bais = new ByteArrayInputStream(content);
547 | bais.read();
548 | try (GZIPInputStream gis = new GZIPInputStream(bais)) {
549 | byte[] buffer = new byte[1024];
550 | ByteArrayOutputStream out = new ByteArrayOutputStream();
551 |
552 | int len;
553 | while ((len = gis.read(buffer)) > 0) {
554 | out.write(buffer, 0, len);
555 | }
556 |
557 | gis.close();
558 | out.close();
559 | return new String(out.toByteArray(), StandardCharsets.UTF_8);
560 | } catch (IOException e) {
561 | throw new IllegalArgumentException(e);
562 | }
563 | }
564 |
565 | /**
566 | * Returns the shortest between these byte[]
567 | *
568 | * @param bytes
569 | * @return the shortest byte[]
570 | */
571 | private static byte[] shortest(byte[]... bytes) {
572 | byte[] shortest = bytes[0];
573 | for (int i = 1; i < bytes.length; i++) {
574 | if (bytes[i].length < shortest.length) {
575 | shortest = bytes[i];
576 | }
577 | }
578 | return shortest;
579 | }
580 |
581 | /**
582 | * Returns the value of the n-th bit of the byte
583 | *
584 | * @param value
585 | * @param n
586 | * @return the value of the n-th bit of the byte
587 | */
588 | private static boolean getNBitValue(byte value, byte n) {
589 | return (value & (1 << n)) != 0;
590 | }
591 |
592 | private static class BitWriter {
593 | private static final int START = 7;
594 | private final ByteArrayOutputStream b = new ByteArrayOutputStream();
595 | private byte current = 0;
596 | private int curpos = START;
597 |
598 | private boolean closed = false;
599 |
600 | public BitWriter() {
601 | }
602 |
603 | public void write(boolean bit) {
604 | if (bit) {
605 | current |= 1 << curpos;
606 | } else {
607 | current &= ~(1 << curpos);
608 | }
609 | if (curpos == 0) {
610 | b.write(current);
611 | current = 0;
612 | curpos = START;
613 | } else {
614 | curpos--;
615 | }
616 | }
617 |
618 | public void write01(String binaryString) {
619 | char[] chars = binaryString.toCharArray();
620 | for (char c : chars) {
621 | write(c == '1');
622 | }
623 | }
624 |
625 | public void write(byte value) {
626 | for (byte bit = 8; bit-- > 0; ) {
627 | write(getNBitValue(value, bit));
628 | }
629 | }
630 |
631 | public void writeLast5Bits(byte value) {
632 | for (byte bit = 5; bit-- > 0; ) {
633 | write(getNBitValue(value, bit));
634 | }
635 | }
636 |
637 | public void close() {
638 | if (!closed) {
639 | closed = true;
640 | if (curpos != START) {
641 | b.write(current);
642 | }
643 | }
644 | }
645 |
646 | public int getSpareBits() {
647 | if (curpos == START) {
648 | return 0;
649 | }
650 | return curpos + 1;
651 | }
652 |
653 | public byte[] toByteArray() {
654 | if (closed || curpos == START) {
655 | return b.toByteArray();
656 | } else {
657 | byte[] bytes = b.toByteArray();
658 | byte[] bytes2 = new byte[bytes.length + 1];
659 | System.arraycopy(bytes, 0, bytes2, 0, bytes.length);
660 | bytes2[bytes.length] = current;
661 | return bytes2;
662 | }
663 | }
664 |
665 | }
666 |
667 | private static class BitReader {
668 | private static final byte START = 7;
669 | private byte curpos = START;
670 | private byte[] bytes;
671 | private int bytePos = 0;
672 |
673 | public BitReader(byte[] b) {
674 | bytes = b;
675 | }
676 |
677 | public byte read() {
678 | if (curpos == START) {
679 | if (bytePos >= bytes.length) {
680 | return 0; //force return 0 because the stream ends
681 | }
682 | byte toRet = bytes[bytePos];
683 | bytePos++;
684 | return toRet;
685 | } else {
686 | byte toRet = 0;
687 | for (int bit = 8; bit-- > 0; ) {
688 | if (bytePos >= bytes.length) {
689 | return 0; //force return 0 because the stream ends
690 | }
691 | if (getNBitValue(bytes[bytePos], curpos)) {
692 | toRet |= 1 << bit;
693 | } else {
694 | toRet &= ~(1 << bit);
695 | }
696 | if (curpos == 0) {
697 | bytePos++;
698 | curpos = START;
699 | } else {
700 | curpos--;
701 | }
702 | }
703 | return toRet;
704 |
705 | }
706 | }
707 |
708 | public byte read(int numberOfBits) {
709 | byte toRet = 0;
710 | for (int bit = numberOfBits; bit-- > 0; ) {
711 | if (bytePos >= bytes.length) {
712 | return 0; //force return 0 because the stream ends
713 | }
714 | if (getNBitValue(bytes[bytePos], curpos)) {
715 | toRet |= 1 << bit;
716 | } else {
717 | toRet &= ~(1 << bit);
718 | }
719 | if (curpos == 0) {
720 | bytePos++;
721 | curpos = START;
722 | } else {
723 | curpos--;
724 | }
725 | }
726 | return toRet;
727 | }
728 |
729 | public String readAsString(int numberOfBits) {
730 | StringBuilder sb = new StringBuilder();
731 | for (int bit = numberOfBits; bit-- > 0; ) {
732 | if (bytePos >= bytes.length) {
733 | return null; //force return null because the stream ends
734 | }
735 | if (getNBitValue(bytes[bytePos], curpos)) {
736 | sb.append('1');
737 | } else {
738 | sb.append('0');
739 | }
740 | if (curpos == 0) {
741 | bytePos++;
742 | curpos = START;
743 | } else {
744 | curpos--;
745 | }
746 | }
747 | return sb.toString();
748 | }
749 |
750 | public char read01Char() {
751 | if (bytePos >= bytes.length) {
752 | return 0; //force return 0 because the stream ends
753 | }
754 | char c;
755 | if (getNBitValue(bytes[bytePos], curpos)) {
756 | c = '1';
757 | } else {
758 | c = '0';
759 | }
760 | if (curpos == 0) {
761 | bytePos++;
762 | curpos = START;
763 | } else {
764 | curpos--;
765 | }
766 | return c;
767 | }
768 |
769 |
770 | public boolean isClosed() {
771 | return bytePos >= bytes.length;
772 | }
773 |
774 | public void advance(int n) {
775 | for (int i = 0; i < n; i++) {
776 | if (curpos == 0) {
777 | bytePos++;
778 | curpos = START;
779 | } else {
780 | curpos--;
781 | }
782 | }
783 | }
784 |
785 | public boolean peek01(String s) {
786 | int bytePosCopy = bytePos;
787 | byte curPosCopy = curpos;
788 |
789 | for (int i = 0; i < s.length(); i++) {
790 | if (bytePosCopy >= bytes.length) {
791 | return false;
792 | }
793 | boolean bitValue = getNBitValue(bytes[bytePosCopy], curPosCopy);
794 | boolean iBitIsSet = s.charAt(i) == ('1');
795 | if (bitValue != iBitIsSet) {
796 | return false;
797 | }
798 |
799 | if (curPosCopy == 0) {
800 | bytePosCopy++;
801 | curPosCopy = START;
802 | } else {
803 | curPosCopy--;
804 | }
805 | }
806 | return true;
807 | }
808 | }
809 |
810 | private static class Huffer {
811 |
812 | private static class HuffmanTree implements Comparable> {
813 |
814 | public final int freq;
815 |
816 | public final HuffmanTree l, r;
817 |
818 | public final T value;
819 |
820 | private final boolean isLeaf;
821 |
822 | public HuffmanTree(HuffmanTree l, HuffmanTree r) {
823 | freq = l.freq + r.freq;
824 | this.l = l;
825 | this.r = r;
826 | value = null;
827 | isLeaf = false;
828 | }
829 |
830 | public HuffmanTree(int freq, T value) {
831 | this.freq = freq;
832 | this.value = value;
833 | l = null;
834 | r = null;
835 | isLeaf = true;
836 | }
837 |
838 | public int compareTo(HuffmanTree o) {
839 | return freq - o.freq;
840 | }
841 |
842 | }
843 |
844 | private static void toMap(HuffmanTree tree, StringBuilder prefix, Map t) {
845 | if (tree.isLeaf) {
846 | t.put(tree.value, prefix.toString());
847 | } else {
848 | prefix.append('0');
849 | toMap(tree.l, prefix, t);
850 | prefix.deleteCharAt(prefix.length() - 1);
851 |
852 | prefix.append('1');
853 | toMap(tree.r, prefix, t);
854 | prefix.deleteCharAt(prefix.length() - 1);
855 | }
856 | }
857 |
858 | private static HuffmanTree makeHuffmanTree(Map objFreqs) {
859 | PriorityQueue> huffmanTrees = new PriorityQueue>();
860 | for (Map.Entry entry : objFreqs.entrySet()) {
861 | huffmanTrees.offer(new HuffmanTree(entry.getValue(), entry.getKey()));
862 | }
863 | while (huffmanTrees.size() > 1) {
864 | HuffmanTree l = huffmanTrees.poll();
865 | HuffmanTree r = huffmanTrees.poll();
866 | huffmanTrees.offer(new HuffmanTree(l, r));
867 | }
868 | return huffmanTrees.poll();
869 | }
870 |
871 | public static Map huff(Collection input) {
872 | Map objectFreqs = makeFreqs(input);
873 | return makeMap(objectFreqs);
874 | }
875 |
876 | private static Map makeFreqs(Collection input) {
877 | Map objectFreqs = new HashMap<>();
878 | for (T obj : input) {
879 | Integer count = objectFreqs.get(obj);
880 | objectFreqs.put(obj, count == null ? 1 : count + 1);
881 | }
882 | return objectFreqs;
883 | }
884 |
885 | private static Map makeMap(Map objectFreqs) {
886 | Map map = new LinkedHashMap();
887 | if (objectFreqs.size() == 1) {
888 | map.put(objectFreqs.entrySet().iterator().next().getKey(), "1");
889 | } else {
890 | toMap(makeHuffmanTree(objectFreqs), new StringBuilder(), map);
891 | }
892 | return map;
893 | }
894 |
895 | }
896 |
897 | private static class UTF8Char {
898 | private static UTF8Char invalidChar = null;
899 | private final byte[] bytes;
900 |
901 | public UTF8Char(byte[] bytes) {
902 | this.bytes = bytes;
903 | }
904 |
905 | @Override
906 | public boolean equals(Object obj) {
907 | if (obj == null || !(obj instanceof UTF8Char)) {
908 | return false;
909 | }
910 | return Arrays.equals(this.bytes, ((UTF8Char) obj).bytes);
911 | }
912 |
913 | public byte[] getBytes() {
914 | return bytes;
915 | }
916 |
917 |
918 | public byte getFirst() {
919 | return bytes[0];
920 | }
921 |
922 | @Override
923 | public int hashCode() {
924 | if (bytes.length == 1) {
925 | return bytes[0];
926 | } else if (bytes.length == 2) {
927 | return ((0xFF & bytes[1]) << 8) | (0xFF & bytes[0]);
928 | } else if (bytes.length == 3) {
929 | return ((0xFF & bytes[2]) << 16) | ((0xFF & bytes[1]) << 8) | (0xFF & bytes[0]);
930 | } else {
931 | return ((0xFF & bytes[3]) << 24) | ((0xFF & bytes[2]) << 16) | ((0xFF & bytes[1]) << 8) | (0xFF & bytes[0]);
932 | }
933 | }
934 |
935 | public String asString() {
936 | return new String(bytes, StandardCharsets.UTF_8);
937 | }
938 |
939 | @Override
940 | public String toString() {
941 | return "UTF8Char:[" + asString() + "][" + as01String() + "]";
942 | }
943 |
944 |
945 | public String as01String() {
946 | StringBuilder sb = new StringBuilder();
947 | for (byte b : bytes) {
948 | for (int bit = 8; bit-- > 0; ) {
949 | sb.append(((b & (1 << bit)) != 0) ? '1' : '0');
950 | }
951 | sb.append(' ');
952 | }
953 | return sb.toString();
954 | }
955 |
956 | public static UTF8Char getInvalidChar() {
957 | if (invalidChar == null) {
958 | invalidChar = new UTF8Char(new byte[]{(byte) 0b10000000});
959 | }
960 | return invalidChar;
961 | }
962 |
963 | public boolean isInvalid() {
964 | return bytes.length == 1 && bytes[0] == (byte) 0b10000000;
965 | }
966 | }
967 |
968 | private static class Trie01 {
969 |
970 | private Trie01[] chldrn = null;
971 |
972 | private T value = null;
973 |
974 | public void add(String s, T value) {
975 | privateAdd(s, value, 0);
976 | }
977 |
978 | @SuppressWarnings("unchecked")
979 | private void privateAdd(String s, T value2, int i) {
980 | if (i < s.length()) {
981 | char charati = s.charAt(i);
982 | int index = charati == '0' ? 0 : charati == '1' ? 1 : -1;
983 | if (chldrn == null) chldrn = (Trie01[]) new Trie01[2];
984 | if (chldrn[index] == null) chldrn[index] = new Trie01<>();
985 | chldrn[index].privateAdd(s, value2, i + 1);
986 | } else {
987 | value = value2;
988 | }
989 | }
990 |
991 | public Scanner scan(char c) {
992 | Scanner sc = new Scanner(this);
993 | sc.scan(c);
994 | return sc;
995 | }
996 |
997 | private static class Scanner {
998 |
999 | private Trie01 curNode;
1000 |
1001 | private Scanner(Trie01 start) {
1002 | curNode = start;
1003 | }
1004 |
1005 | public boolean hasValue() {
1006 | return curNode.chldrn == null;
1007 | }
1008 |
1009 | public void scan(char c) {
1010 | curNode = curNode.chldrn[c == '0' ? 0 : c == '1' ? 1 : -1];
1011 | }
1012 |
1013 | public T getValue() {
1014 | return curNode.value;
1015 | }
1016 |
1017 | }
1018 |
1019 | }
1020 |
1021 | }
--------------------------------------------------------------------------------