├── .gitignore ├── .ocamlformat ├── AUTHORS.md ├── CHANGES.md ├── Dockerfile ├── LICENSE.md ├── README.md ├── bench ├── bench.ml ├── bs.c ├── densld.ml ├── dune ├── lzld.ml ├── mclock.c ├── run.ml └── zpipe.c ├── bin ├── decompress.ml └── dune ├── decompress.opam ├── dune-project ├── fuzz ├── dune ├── fuzz.ml ├── fuzz_lzo.ml └── fuzz_ns.ml ├── lib ├── de.ml ├── de.mli ├── dune ├── gz.ml ├── gz.mli ├── lz.ml ├── lz.mli ├── lzo.ml ├── lzo.mli ├── zl.ml └── zl.mli ├── rfc1951.opam └── test ├── bin ├── dune ├── simple.t └── zpipe.c ├── corpus ├── bib ├── book1 ├── book2 ├── geo ├── news ├── obj1 ├── obj2 ├── paper1 ├── paper2 ├── pic ├── progc ├── progl ├── progp ├── rfc5322.txt └── trans ├── dune ├── minilzo-2.10 ├── COPYING ├── Makefile ├── README.LZO ├── dune ├── lzoconf.h ├── lzodefs.h ├── minilzo.c ├── minilzo.h ├── minilzo.ml ├── stubs.c └── testmini.c ├── test.ml ├── test_deflate.ml ├── test_lzo.ml └── test_ns.ml /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | setup.data 3 | setup.log 4 | doc/*.html 5 | *.native 6 | *.byte 7 | *.so 8 | lib/decompress_conf.ml 9 | *.tar.gz 10 | _tests 11 | lib_test/files 12 | zpipe 13 | c/dpipe 14 | *.merlin 15 | *.install -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | version=0.22.4 2 | profile=conventional 3 | break-struct=natural 4 | break-infix=fit-or-vertical 5 | break-sequences=false 6 | break-collection-expressions=wrap 7 | break-separators=before 8 | exp-grouping=preserve 9 | parens-tuple=multi-line-only 10 | space-around-lists=false 11 | space-around-records=false 12 | space-around-arrays=false 13 | break-fun-decl=smart 14 | cases-exp-indent=2 15 | sequence-style=before 16 | field-space=tight 17 | break-before-in=auto 18 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | * [Romain Calascibetta](http://din.osau.re/), main developer. 2 | * Jeremy Yallop, optimization. 3 | * Anil Madhavapeddy, docker file. 4 | * Thomas Gazagnaire, lz77 algorithm. 5 | * Charles-Edouard Lecat, gzip implementation. 6 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | ### v1.5.3 2023-09-19 Paris (France) 2 | 3 | - Delete `libdecompress.a` (@dinosaure, #152) 4 | 5 | This artifact is unused and complexify the distribution. We decided to delete 6 | it. If people want to use `libdecompress.a`, we can provide an other package 7 | which will implement the `ctypes` ceremony to safely produce a 8 | `libdecompress.a`. 9 | 10 | - Lint dependencies (lower bounds) (@dinosaure, #153) 11 | - Remove unsafe accesses into our `decompress.lzo` implementation 12 | (@dinosaure, #154) 13 | - Improve and fix our `decompress.lzo` implementation 14 | (@dinosaure, #155, #158, #159) 15 | 16 | ### v1.5.2 2023-01-23 Essaouira (Maroc) 17 | 18 | - Remove remaining `bigarray-compat` dependencies (@copy, #147) 19 | 20 | ### v1.5.1 2022-08-30 Paris (France) 21 | 22 | - Fix the stream of gzip inflation. If the user wants to know how many bytes 23 | are available into the output buffer, he/she must be in the DEFLATE internal 24 | state. Otherwise, we raise an exception. However, such information is not 25 | available into the API so we decided to say that the full output buffer is 26 | free when we are into the GZip header state. 27 | 28 | It ensures a real full stream API. (@dinosaure, #144) 29 | 30 | ### v1.5.0 2022-08-28 Paris (France) 31 | 32 | - Update with `ocamlformat.0.20.0` (@dinosaure, #133) 33 | - Add `lzo` into the binary (@dinosaure, #140) 34 | - Be able to deflate/inflate files (@dinosaure, #141) 35 | - Implement the zero-compression into zlib/gzip (@dinosaure, #142) 36 | **breaking change**: The behavior of flat block changed. The user does not 37 | specify how many bytes he/she wants to give. He/She can just specify that 38 | he/she wants a `Flat` block. The `Flat` block will contains what the queue 39 | has and nothing more: if the queue has 4 elements, we will encode a `Flat` 40 | block with 4 elements, if the queue has more than 65535 elements, we will 41 | encore only 65535 elements into the `Flat` block. 42 | 43 | The user is not able to _stream_ a `Flat` block: we can not fill one `Flat` 44 | block with multiple `Await`. This behavior is specific to the `Flat` block, 45 | the rest (`Fixed` and `Dynamic` blocks) did not change. 46 | 47 | For higher level API, the `level = 0` informs the _deflator_ to copy as is 48 | input - no compression was done for `zlib` and `gzip` and we emit only `Flat` 49 | blocks. By default, the `level = 4` is given so you probably will not notice 50 | anything but be care that we shifted compression level and `0` becomes a 51 | level without compression. 52 | 53 | ### v1.4.3 2022-04-08 Paris (France) 54 | 55 | - Replace deprecated function of `fmt` (@dinosaure, #135) 56 | - Remove `bigarray-compat`, `stdlib-shims` and support only 57 | OCaml >= 4.08.0 (@hannesm, #138) 58 | - Update to `cmdliner.1.1.0` (@hannesm, #138) 59 | 60 | ### v1.4.2 2021-08-02 Paris (France) 61 | 62 | - Fix lower bounds of `cmdliner` (@kit-ty-kate, #130) 63 | - Fix big-endian support (@dinosaure, @talex5, #131) 64 | 65 | ### v1.4.1 2021-05-11 Paris (France) 66 | 67 | - Fix and of file and end of block _op-code_ (@dinosaure, #123) 68 | **breaking changes** 69 | Semantically, the module `De` has another behavior about the inflation. 70 | Previously, the _stream_ inflation was smart enough to recognize the end 71 | of the stream and the user did not need to really emit: 72 | `De.Inf.src decoder empty 0 0` to say the end of the stream. Now, such 73 | call is required to notice to `De.Inf` the end of the stream. By this 74 | way, we are able to terminate the inflation correctly and we still 75 | continue to raise an error for unterminated stream 76 | (see `tests/invalid_distance_code`). 77 | 78 | For `Zl`/`Gz` users, this update does not imply anything when these 79 | implementations take care about such detail. Only `De` users should update 80 | their code to really emit the end of the stream with 81 | `De.Inf.src decoder empty 0 0`. In the PR, the diff show how to upgrade such 82 | code. 83 | - Upgrade `decompress` to `optint.0.1.0` (@samoht, @dinosaure, #124) 84 | - Fix compilation of benchmarks (@dinosaure, #128) 85 | - Fix out of bounds errors on the _non-stream_ implementation (@clecat, 86 | @ewanmellor, @dinosaure, #126, #127) 87 | - Optimize `memcpy` used on the _non-stream_ implementation (@clecat, 88 | @dinosaure, #129) 89 | 90 | ### v1.4.0 2021-04-22 Paris (France) 91 | 92 | - Add a well-know limitation about the encoding on the documentation, the 93 | output buffer must be upper than 2 bytes in any cases. 94 | (@dinosaure, #114) 95 | - Improve the documentation 96 | (@dinosaure, @brendanlong, #115) 97 | - **breaking changes**, the type of the window used to deflate according 98 | RFC 1951 was updated to `De.Lz77.window` 99 | (@dinosaure, #116 & #115) 100 | - Fix a bug when we want to add the EOB _op-code_ into the queue. The deflation 101 | takes care about that. Note that the queue must be larger than 2. 102 | (@dinosaure, @kluvin, #117) 103 | - Improve the documentation a bit 104 | (@mseri, @dinosaure, #119) 105 | - Fix the use of `optint` and how we handle large files with `Gz`. Fix an error 106 | when we encode the `isize` into the deflated stream. 107 | (@dinosaure, @igarnier, #121 & #120) 108 | - Add a _non-stream_ implementation (@clecat, @dinosaure, #102 & #92) 109 | 110 | The non-blocking stream API has a cost to maintain a _state_ across _syscall_ 111 | such as `read` and `write`. It's useful when we want to plug `decompress` 112 | behind something like a `socket` and care about memory-consumption but it has 113 | a big cost when we want to compress/decompress an object saved into one and 114 | unique buffer. 115 | 116 | The _non-stream_ API gives an opportunity to inflate/deflate one and unique 117 | buffer without the usual plumbing required by the non-blocking stream API. 118 | However, we are limited to compute only objects which can fit into a 119 | `bigarray`. 120 | 121 | About performance, the non-stream API is better than the non-blocking stream 122 | API. See the PR for more details about performances. On the 123 | `book2` (from the Calgary corpus) file: 124 | - `decompress` (stream): 125 | 15 Mb/s (deflation), 76 Mb/s (inflation), ratio: 42.46 % 126 | - `decompress` (non-stream): 127 | 17 Mb/s (deflation), 105 Mb/s (inflation), ratio: 34.66 % 128 | 129 | Even if we checked the implementation with our tests (we ran `ocaml-git` and 130 | `irmin` with this path), the implementation is young and we probably miss 131 | some details/bugs. So we advise the user to compare, at least, the non-stream 132 | implementation with the non-blocking stream implementation if something is 133 | wrong. 134 | 135 | ### v1.3.0 2020-03-03 Paris (France) 136 | 137 | - Add a little executable to benchmark inflation into the distribution 138 | (@dinosaure, #93) 139 | - Add instructions for running benchmark (@gs0510, #94) 140 | - Clarify the description (@XVilka, #96) 141 | - Improve the benchmark and outputs (@dinosaure, @gs0510, #95) 142 | - Avoid allocation of distance table (@Engil, @dinosaure, #97) 143 | - Swapping from arithmetic to logical bitshifts on `d.hold` (@clecat, #99) 144 | - Make the use of all `Higher.compress` arguments (@vect0r-vicall, #103) 145 | - Apply ocamlformat.0.16.0 (@dinosaure, #105, #107) 146 | - Improve Lz77 algorithms (@dinosaure, #108) 147 | **breaking changes** the deflation expects a new window: `De.Lz77.make_window` 148 | instead of `De.make_window` (which is twice larger to improve the compression 149 | algorithm) 150 | 151 | Depending on the level and your corpus, we did not observe performance 152 | regression on deflation (and #97 improves a lot performances). An higher level 153 | is slower (but the compression ratio is better). We advise, by default, to use 154 | the level 6. 155 | 156 | Note that the user is able to make its own compression algorithm according to 157 | his corpus. An example of such implementation is available on the new 158 | `decompress.lz` libraries which fills a queue and compress the input. 159 | 160 | **breaking changes** decompress expects a level between 0 and 9 (inclusive) 161 | (instead of 0 and 3). 162 | - Add tests about level compression (@dinosaure, #109) 163 | - Add level on GZip layer (@dinosaure, #110) 164 | - Provide a `ctypes` reverse binding (@dinosaure, #98) 165 | - Provide a binary `decompress.pipe` which can compress/uncompress with 166 | deflate, zlib or gzip format. 167 | 168 | ### v1.2.0 2020-07-07 Paris (France) 169 | 170 | - add LZO support (@dinosaure, @cfcs, @XVilka, #82) 171 | - update binaries (@dinosaure, @XVilka, #89) 172 | - fix an exception leak (@dinosaure, @b1gtang, #88) 173 | - update README.md (@dinosaure, @XVilka, #87) 174 | - fix a mis-use of `Zl` API (@dinosaure, #85) 175 | - add `dune` as a dependency of `rfc1951` (@kit-ty-kate) 176 | - real non-blocking state with `Zl` (@dinosaure, #84) 177 | 178 | ### v1.1.0 2019-03-10 Paris (France) 179 | 180 | - add GZip support (@dinosaure, @copy, @hcarty, #79) 181 | - **breaking changes**, `Higher` returns a `result` value instead to raise an 182 | exception (@dinosaure, @copy, #80) 183 | - protect Zlib decoder on multiple _no-op_ calls of `decode` 184 | - test when we generate an empty zlib flow 185 | - fix a bug on the DEFLATE layer when we must flush bits to avoid integer 186 | overflow 187 | - really use the internal continuation of the Zlib state 188 | - delete `fmt` depedency 189 | - update fuzzer 190 | - fix default level on `Zl.Higher` 191 | 192 | ### v1.0.0 2019-08-30 Paris (France) 193 | 194 | ** breaking changes ** 195 | 196 | `decompress.1.0.0` is 3 times faster about decompression than before. A huge 197 | [amount of work was done](https://tarides.com/blog/2019-08-26-decompress-the-new-decompress-api.html) 198 | to improve performance and coverage. 199 | 200 | The main reason to update the API is to fix a bad design decision regarding 201 | split compression and encoding. User is able to implement a new compression 202 | algorithm and use it. 203 | 204 | Release comes with regressions: 205 | - `decompress` only supports `Bigarray` now, not `Bytes` 206 | - GZIP layer does not exist anymore 207 | - state of RFC1951 encoder/decoder is not referentially transparent anymore 208 | 209 | Of course, v1.0.0 comes with fixes and improvements: 210 | - `decompress` is able to compress/uncompress 211 | [Calgary corpus](https://en.wikipedia.org/wiki/Calgary_corpus) 212 | - tests are improved and they include all coverage tests from `zlib` 213 | - compression algorithm has a fuzzer 214 | - encoder has a fuzzer 215 | - performance about decoder is 3 times better than `decompress.v0.9.0` and 3 216 | times slower than `zlib` 217 | 218 | `decompress` is split into 2 main modules: 219 | - `dd` which implements RFC1951 220 | - `zz` which implements ZLIB 221 | 222 | API of them are pretty-close to what `decompress.v0.9.0` does with some 223 | advantages on `dd`: 224 | - User can use their own compression algorithm instead of `Dd.L` 225 | - encoder exposes more granular control over what it emits (which block, when, 226 | where) 227 | - Huffman tree generation is out of `dd` 228 | 229 | As a response to #25, `dd` provides a _higher_ level API resembling `camlzip`. 230 | 231 | ### v0.9.0 2019-07-10 Paris (France) 232 | 233 | * Add support of 4.07 and 4.08 in Travis (@XVilka, @dinosaure, #70, #71) 234 | * Use `mmap` (@XVilka, @dinosaure, @hannesm, #68, #69, #71) 235 | * Update documentation (@yurug, @dinosaure, #65, #66) 236 | * Micro-optimization about specialization (@dinosaure, #64) 237 | * Re-organize internals of `decompress` (@dinosaure, #63) 238 | * GZIP support (@clecat, review by @dinosaure, @cfcs, @hannesm, #60) 239 | - fix #58 (@dinosaure) 240 | 241 | ### v0.8.1 2018-10-16 Paris (France) 242 | 243 | * _Dunify_ project (@dinosaure) 244 | * *breaking-change* Unbox `Bytes.t` and `Bigstring.t` as I/O buffer (@dinosaure) 245 | * Add foreign tests vectors (@cfcs, @dinosaure) 246 | * Catch invalid distance (@XVilka, @dinosaure) 247 | * Better check on dictionaries (@XVilka, @dinosaure) 248 | * **breaking-change** Add [wbits] argument to check Window size on RFC1951 249 | (@XVilka, @dinosaure) 250 | 251 | ### v0.8 2018-07-09 Paris (France) 252 | 253 | * Implementation of RFC1951 (task from @cfcs) 254 | * *breaking change* New interface of decompress 255 | 256 | We wrap API in `Zlib_{inflate/deflate}` and add `RFC1951_{inflate/deflate}`. 257 | 258 | * Move to `jbuilder`/`dune` (task from @samoht) 259 | * Better check on `zlib` header 260 | * Fixed infinite loop (task fron @cfcs) 261 | 262 | See 2e3af68, `decompress` has an infinite loop when the inflated dictionary 263 | does not provide any bindings (and length of opcode is <= 0). In this case, 264 | `decompress` expects an empty input and provide an empty output in any case. 265 | 266 | * Use re.1.7.2 on tests 267 | * Use camlzip.1.07 on tests 268 | 269 | ### v0.7 2017-10-18 Paris (France) 270 | 271 | * Fixed Inflate.write function 272 | * Fixed internal state to stick in a internal final state 273 | * Fixed compilation with js_of_ocaml (use trampoline function to avoid 274 | stack-overflow) 275 | * Fixed clash of name about state variable in the Inflate module 276 | * Add afl program 277 | * Export Adler-32 modules 278 | * Add -i and -o option in the dpipe binary to inform the size of the 279 | internal chunk 280 | * Change the value of -mode in the dpipe binary 281 | 282 | ### v0.6 2017-05-11 Cao Lãnh (Vietnam) 283 | 284 | - Fixed bug #29 285 | - Produce far pattern (Lz77 compression) 286 | - Optimize memory consumption of the Inflate module 287 | - Move repository from oklm-wsh to mirage 288 | - Learn topkg release 289 | 290 | ### v0.5 2017-02-17 Essaouira (Maroc) 291 | 292 | - Stabilize the interface (@dbuenzli's interface) 293 | - Merge optimization from @yallop 294 | - Add `sync_flush`, `partial_flush`, `full_flush` (experimental) 295 | - Move the build system to `topkg` 296 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ocaml/opam:alpine_ocaml-4.03.0 2 | COPY . /home/opam/src 3 | RUN sudo chown -R opam /home/opam/src 4 | RUN opam pin add -n decompress /home/opam/src 5 | RUN opam depext -uivj 3 decompress 6 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019 Romain Calascibetta 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Decompress - Pure OCaml implementation of decompression algorithms 2 | 3 | `decompress` is a library which implements: 4 | - [RFC1951](https://tools.ietf.org/html/rfc1951) 5 | - [Zlib](https://zlib.net/) 6 | - [Gzip](https://tools.ietf.org/html/rfc1952) 7 | - [LZO](https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Oberhumer) 8 | 9 | ## The library 10 | 11 | The library is available with: 12 | ``` 13 | $ opam install decompress 14 | ``` 15 | 16 | It provides three sub-packages: 17 | - `decompress.de` to handle RFC1951 stream 18 | - `decompress.zl` to handle Zlib stream 19 | - `decompress.gz` to handle Gzip stream 20 | - `decompress.lzo` to handle LZO contents 21 | 22 | Each sub-package provide 3 sub-modules: 23 | - `Inf` to inflate/decompress a stream 24 | - `Def` to deflate/compress a stream 25 | - `Higher` as a easy entry point to use the stream 26 | 27 | ## How to use it 28 | 29 | ### The binary 30 | 31 | The distribution provides a simple binary which is able to compress/uncompress 32 | anything: 33 | ```sh 34 | $ decompress -fgzip --deflate < my_document.txt > my_document.gzip 35 | $ decompress -fgzip < my_document.gzip > my_document.out 36 | $ diff my_document.txt my_document.out 37 | ``` 38 | 39 | It does the GZip compression, the Zlib one and the DEFLATE one. It can do an 40 | LZO compression too. 41 | 42 | ### Link issue 43 | 44 | `decompress` uses [`checkseum`][checkseum] to compute CRC of streams. 45 | `checkseum` provides 2 implementations: 46 | - a C implementation to be fast 47 | - an OCaml implementation to be usable with `js_of_ocaml` (or, at least, 48 | require only the _caml runtime_) 49 | 50 | When the user wants to make an OCaml executable, it must choose which 51 | implementation of `checkseum` he wants. A compilation of an executable with 52 | `decompress.zl` is: 53 | ``` 54 | $ ocamlfind opt -linkpkg -package checkseum.c,decompress.zl main.ml 55 | ``` 56 | 57 | Otherwise, the end-user should have a linking error (see 58 | [#47](https://github.com/mirage/decompress/issues/47)). 59 | 60 | #### With `dune` 61 | 62 | `checkseum` uses a mechanism integrated into `dune` which solves the link 63 | issue. It provides a way to silently choose the default implementation of 64 | `checkseum`: `checkseum.c`. 65 | 66 | By this way (and only with `dune`), an executable with `decompress.zl` is: 67 | ``` 68 | (executable 69 | (name main) 70 | (libraries decompress.zl)) 71 | ``` 72 | 73 | Of course, the user still is able to choose which implementation he wants: 74 | ``` 75 | (executable 76 | (name main) 77 | (libraries checkseum.ocaml decompress.zl)) 78 | ``` 79 | 80 | ### The API 81 | 82 | `decompress` proposes to the user a full control of: 83 | - the input/output loop 84 | - the allocation 85 | 86 | #### Input / Output 87 | 88 | The process of the inflation/deflation is non-blocking and it does not require 89 | any _syscalls_ (as an usual MirageOS project). The user can decide how to get 90 | the input and how to store the output. 91 | 92 | An usual _loop_ (which can fit into `lwt` or `async`) of `decompress.zl` is: 93 | ```ocaml 94 | let rec go decoder = match Zl.Inf.decode decoder with 95 | | `Await decoder -> 96 | let len = input itmp 0 (Bigstringaf.length tmp) in 97 | go (Zl.Inf.src decoder itmp 0 len) 98 | | `Flush decoder -> 99 | let len = Bigstringaf.length otmp - Zl.Inf.dst_rem decoder in 100 | output stdout otmp 0 len ; 101 | go (Zl.Inf.flush decoder) 102 | | `Malformed err -> invalid_arg err 103 | | `End decoder -> 104 | let len = Bigstringaf.length otmp - Zl.Inf.dst_rem decoder in 105 | output stdout otmp 0 len in 106 | go decoder 107 | ``` 108 | 109 | #### Allocation 110 | 111 | Then, the process does not allocate large objects but it requires at the 112 | initialisation these objects. Such objects can be re-used by another 113 | inflation/deflation process - of course, these processes can not use same 114 | objects at the same time. 115 | 116 | ```ocaml 117 | val decompress : window:De.window -> in_channel -> out_channel -> unit 118 | 119 | let w0 = De.make_windows ~bits:15 120 | 121 | (* Safe use of decompress *) 122 | let () = 123 | decompress ~window:w0 stdin stdout ; 124 | decompress ~window:w0 (open_in "file.z") (open_out "file") 125 | 126 | (* Unsafe use of decompress, 127 | the second process must use an other pre-allocated window. *) 128 | let () = 129 | Lwt_main.run @@ 130 | Lwt.join [ (decompress ~window:w0 stdin stdout |> Lwt.return) 131 | ; (decompress ~window:w0 (open_in "file.z") (open_out "file") 132 | |> Lwt.return) ] 133 | ``` 134 | 135 | This ability can be used on: 136 | - the input buffer given to the encoder/decoder with `src` 137 | - the output buffer given to the encoder/decoder 138 | - the window given to the encoder/decoder 139 | - the shared-queue used by the compression algorithm and the encoder 140 | 141 | ### Example 142 | 143 | An example exists into [bin/decompress.ml][decompress.ml] where you can see how 144 | to use `decompress.zl` and `decompress.de`. 145 | 146 | ### Higher interface 147 | 148 | However, `decompress` provides a _higher_ interface close to what `camlzip` 149 | provides to help newcomers to use `decompress`: 150 | ```ocaml 151 | val compress : 152 | refill:(bigstring -> int) 153 | -> flush:(bigstring -> int -> unit) 154 | -> unit 155 | val uncompress : 156 | refill:(bigstring -> int) 157 | -> flush:(bigstring -> int -> unit) 158 | -> unit 159 | ``` 160 | 161 | ### Benchmark 162 | 163 | `decompress` has a benchmark about _inflation_ to see if any update has a 164 | performance implication. The process try to _inflate_ a stream and stop at N 165 | second(s) (default is 30), The benchmark requires `libzlib-dev`, `cmdliner` and 166 | `bos` to be able to compile `zpipe` and the executable to produce the CSV file. 167 | To build the benchmark: 168 | 169 | ```sh 170 | $ dune build --profile benchmark bench/output.csv 171 | ``` 172 | 173 | On linux machines, `/dev/urandom` will generate the random input for piping to 174 | zpipe. To run the benchmark: 175 | ```sh 176 | $ cat /dev/urandom | ./_build/default/bench/zpipe \ 177 | | ./_build/default/bench/bench.exe 2> /dev/null 178 | ``` 179 | 180 | The output file is a CSV file which can be processed by a _plot_ software. It 181 | records input bytes, output bytes and memory usage at each second. You can 182 | show results with `gnuplot`: 183 | ```sh 184 | $ gnuplot -p -e \ 185 | 'set datafile separator ","; 186 | set key autotitle columnhead; 187 | plot "_build/default/bench/output.csv" using 1:2 with lines, 188 | "" using 1:3 with lines' 189 | $ gnuplot -p -e \ 190 | 'set datafile separator ","; 191 | set key autotitle columnhead; 192 | plot "_build/default/bench/output.csv" using 1:4 with lines' 193 | ``` 194 | 195 | The second graph ensure that the inflation does not allocate while it 196 | processes. It ensure that, at another layer, `decompress` does not leak 197 | memory. 198 | 199 | ## Build Requirements 200 | 201 | * OCaml >= 4.07.0 202 | * `dune` to build the project 203 | * `base-bytes` meta-package 204 | * `checkseum` 205 | * `optint` 206 | 207 | [checkseum]: https://github.com/mirage/checkseum 208 | [decompress.ml]: ./bin/decompress.ml 209 | -------------------------------------------------------------------------------- /bench/bench.ml: -------------------------------------------------------------------------------- 1 | external now : unit -> (int64[@unboxed]) 2 | = "clock_linux_get_time_bytecode" "clock_linux_get_time_native" 3 | [@@noalloc] 4 | 5 | external read : Unix.file_descr -> De.bigstring -> int -> int -> int = "bs_read" 6 | [@@noalloc] 7 | 8 | external write : Unix.file_descr -> De.bigstring -> int -> int -> int 9 | = "bs_write" 10 | [@@noalloc] 11 | 12 | let w = De.make_window ~bits:15 13 | let o = De.bigstring_create De.io_buffer_size 14 | let i = De.bigstring_create De.io_buffer_size 15 | let q = De.Queue.create 16384 16 | let allocate _ = w 17 | 18 | let rec fully_write fd buf off len = 19 | let len' = write fd buf off len in 20 | if len' < len then fully_write fd buf (off + len') (len - len') 21 | 22 | exception Stop 23 | 24 | let plot metrics max = 25 | Fmt.pr "time,in,out,live\n%!" 26 | ; for i = 0 to max - 1 do 27 | Fmt.pr "%d," (succ i) 28 | ; for j = 0 to 2 do 29 | Fmt.pr "%d" metrics.((i * 3) + j) 30 | ; if j < 2 then Fmt.pr "," 31 | done 32 | ; Fmt.pr "\n%!" 33 | done 34 | 35 | type result = { 36 | time: int 37 | ; in_bytes_per_sec: int 38 | ; out_bytes_per_sec: int 39 | ; live_heap_bytes: int 40 | } 41 | [@@deriving to_yojson] 42 | 43 | type results = {metrics: result list} [@@deriving to_yojson] 44 | 45 | let transform_arr_to_results metrics max = 46 | let indices = List.init max (fun x -> x) in 47 | let metrics = 48 | List.map 49 | (fun index -> 50 | let time = index + 1 in 51 | let in_bytes_per_sec = metrics.((index * 3) + 0) in 52 | let out_bytes_per_sec = metrics.((index * 3) + 1) in 53 | let live_heap_bytes = metrics.((index * 3) + 2) in 54 | {time; in_bytes_per_sec; out_bytes_per_sec; live_heap_bytes}) 55 | indices in 56 | {metrics} 57 | 58 | let plot_json metrics max = 59 | let metrics = transform_arr_to_results metrics max in 60 | let fmt = stdout |> Format.formatter_of_out_channel in 61 | let open Yojson.Safe in 62 | let obj = `Assoc ["results", results_to_yojson metrics] in 63 | pretty_print fmt obj 64 | 65 | let inflate max flag = 66 | let open Zl in 67 | let decoder = Inf.decoder `Manual ~o ~allocate in 68 | let metrics = Array.make (max * 3) 0 (* in bytes, out bytes, live words *) in 69 | let rec go idx ts decoder = 70 | let idx, ts = 71 | if Int64.sub (now ()) ts >= 1_000_000_000L then ( 72 | let {Gc.top_heap_words; _} = Gc.quick_stat () in 73 | metrics.((idx * 3) + 2) <- top_heap_words 74 | ; if succ idx >= max then raise Stop 75 | ; succ idx, Int64.add 1_000_000_000L ts) 76 | else idx, ts in 77 | match Inf.decode decoder with 78 | | `Await decoder -> 79 | let len = read Unix.stdin i 0 De.io_buffer_size in 80 | metrics.((idx * 3) + 0) <- metrics.((idx * 3) + 0) + len 81 | ; go idx ts (Inf.src decoder i 0 len) 82 | | `Flush decoder -> 83 | let len = De.io_buffer_size - Inf.dst_rem decoder in 84 | fully_write Unix.stderr o 0 len 85 | ; metrics.((idx * 3) + 1) <- metrics.((idx * 3) + 1) + len 86 | ; go idx ts (Inf.flush decoder) 87 | | `Malformed err -> invalid_arg err 88 | | `End _ -> 89 | let len = De.io_buffer_size - Inf.dst_rem decoder in 90 | if len > 0 then fully_write Unix.stderr o 0 len 91 | ; metrics.((idx * 3) + 1) <- metrics.((idx * 3) + 1) + len in 92 | (try go 0 (now ()) decoder with Stop -> ()) 93 | ; match flag with true -> plot_json metrics max | _ -> plot metrics max 94 | 95 | open Cmdliner 96 | 97 | let max = 98 | let doc = "Maximum time count in seconds" in 99 | Arg.(value & opt int 30 & info ["m"; "max"] ~doc) 100 | 101 | let json = 102 | let doc = "Print json output." in 103 | Arg.(value & flag & info ["j"; "json"] ~doc) 104 | 105 | let cmd = 106 | Cmd.v 107 | (Cmd.info "bench" ~doc:"Run benchmarks") 108 | Term.(const inflate $ max $ json) 109 | 110 | let () = exit @@ Cmd.eval cmd 111 | -------------------------------------------------------------------------------- /bench/bs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | value 5 | bs_write(value fd, value buf, value off, value len) 6 | { 7 | int ret; 8 | 9 | ret = write(Int_val(fd), (char *) Caml_ba_array_val(buf)->data + Long_val(off), 10 | Long_val(len)) ; 11 | 12 | return Val_long(ret); 13 | } 14 | 15 | value 16 | bs_read(value fd, value buf, value off, value len) 17 | { 18 | int ret; 19 | 20 | ret = read(Int_val(fd), (char *) Caml_ba_array_val(buf)->data + Long_val(off), 21 | Long_val(len)); 22 | 23 | return Val_long(ret); 24 | } 25 | -------------------------------------------------------------------------------- /bench/densld.ml: -------------------------------------------------------------------------------- 1 | open Cmdliner 2 | open De_landmarks 3 | 4 | let inflate file d = 5 | let file = open_in file in 6 | let len = in_channel_length file in 7 | let src = Bigstringaf.of_string (really_input_string file len) ~off:0 ~len in 8 | if d then 9 | let dst = Bigstringaf.create (len * 10) in 10 | ignore @@ Inf.Ns.inflate src dst 11 | else 12 | let dst = Bigstringaf.create (Def.Ns.compress_bound len) in 13 | ignore @@ Def.Ns.deflate src dst 14 | 15 | let file = 16 | let doc = "input file" in 17 | Arg.(value & pos 0 string "file" & info [] ~doc) 18 | 19 | let d = 20 | let doc = "decompress the input" in 21 | Arg.(value & flag & info ["d"] ~doc) 22 | 23 | let cmd = 24 | let info = Cmd.info "bench" ~doc:"Run benchmarks for ns implementation" in 25 | Cmd.v info Term.(const inflate $ file $ d) 26 | 27 | let () = Cmd.(exit @@ eval cmd) 28 | -------------------------------------------------------------------------------- /bench/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name bench) 3 | (modules bench) 4 | (foreign_stubs 5 | (language c) 6 | (names mclock bs)) 7 | (enabled_if 8 | (= %{profile} benchmark)) 9 | (preprocess 10 | (pps ppx_deriving_yojson)) 11 | (libraries 12 | unix 13 | fmt 14 | decompress.de 15 | decompress.zl 16 | cmdliner 17 | ppx_deriving_yojson.runtime 18 | yojson)) 19 | 20 | (executable 21 | (name run) 22 | (modules run) 23 | (enabled_if 24 | (= %{profile} benchmark)) 25 | (libraries cmdliner fpath rresult bos)) 26 | 27 | (rule 28 | (targets zpipe) 29 | (deps 30 | (:zpipe zpipe.c)) 31 | (action 32 | (run %{cc} %{zpipe} -lz -o %{targets}))) 33 | 34 | (rule 35 | (targets output.csv) 36 | (enabled_if 37 | (= %{profile} benchmark)) 38 | (deps 39 | (:run run.exe) 40 | (:zpipe zpipe) 41 | (:bench bench.exe)) 42 | (action 43 | (chdir 44 | %{workspace_root} 45 | (run %{run} -o %{targets})))) 46 | 47 | (library 48 | (name lz_landmarks) 49 | (modules lz_landmarks) 50 | (enabled_if 51 | (= %{profile} benchmark)) 52 | (libraries checkseum optint landmarks de) 53 | (preprocess 54 | (pps landmarks-ppx --auto))) 55 | 56 | (rule 57 | (copy ../lib/lz.ml lz_landmarks.ml)) 58 | 59 | (rule 60 | (copy ../lib/lz.mli lz_landmarks.mli)) 61 | 62 | (executable 63 | (name lzld) 64 | (modules lzld) 65 | (enabled_if 66 | (= %{profile} benchmark)) 67 | (libraries decompress.de lz_landmarks)) 68 | 69 | (library 70 | (name de_landmarks) 71 | (modules de_landmarks) 72 | (enabled_if 73 | (= %{profile} benchmark)) 74 | (libraries checkseum optint landmarks de) 75 | (flags 76 | (:standard -w -55)) 77 | (preprocess 78 | (pps landmarks-ppx --auto))) 79 | 80 | (rule 81 | (copy ../lib/de.ml de_landmarks.ml)) 82 | 83 | (rule 84 | (copy ../lib/de.mli de_landmarks.mli)) 85 | 86 | (executable 87 | (name densld) 88 | (modules densld) 89 | (enabled_if 90 | (= %{profile} benchmark)) 91 | (libraries cmdliner de_landmarks bigstringaf)) 92 | -------------------------------------------------------------------------------- /bench/lzld.ml: -------------------------------------------------------------------------------- 1 | open Lz_landmarks 2 | 3 | let q = De.Queue.create 0x1000 4 | let w = make_window ~bits:15 5 | 6 | let () = 7 | let state = state ~q ~w (`Channel stdin) in 8 | let rec go state = 9 | match compress state with 10 | | `Flush -> De.Queue.reset q ; go state 11 | | `End -> () 12 | | `Await -> assert false in 13 | go state 14 | -------------------------------------------------------------------------------- /bench/mclock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #ifndef __unused 12 | #define __unused(x) x __attribute((unused)) 13 | #endif 14 | #define __unit() value __unused(unit) 15 | 16 | uint64_t 17 | clock_linux_get_time_native(__unit ()) 18 | { 19 | struct timespec ts; 20 | 21 | clock_gettime(CLOCK_MONOTONIC, &ts); 22 | 23 | return ((uint64_t) ts.tv_sec 24 | * (uint64_t) 1000000000LL 25 | + (uint64_t) ts.tv_nsec); 26 | } 27 | 28 | CAMLprim value 29 | clock_linux_get_time_bytecode(__unit ()) 30 | { 31 | struct timespec ts; 32 | 33 | clock_gettime(CLOCK_MONOTONIC, &ts); 34 | 35 | return caml_copy_int64((uint64_t) ts.tv_sec 36 | * (uint64_t) 1000000000LL 37 | + (uint64_t) ts.tv_nsec); 38 | } 39 | -------------------------------------------------------------------------------- /bench/run.ml: -------------------------------------------------------------------------------- 1 | open Rresult 2 | open Bos 3 | 4 | let ( <.> ) f g x = f (g x) 5 | 6 | let run max json = 7 | let bench_cmd = 8 | match json with 9 | | true -> Cmd.(v "./bench/bench.exe" % "-m" % string_of_int max % "-j") 10 | | false -> Cmd.(v "./bench/bench.exe" % "-m" % string_of_int max) in 11 | let open OS.Cmd in 12 | (out_run_in <.> run_out) Cmd.(v "cat" % "/dev/urandom") 13 | >>= (out_run_in <.> run_io Cmd.(v "./bench/zpipe")) 14 | >>= (R.ok <.> run_io ~err:err_null bench_cmd) 15 | >>= out_string ~trim:false 16 | 17 | let run output max json = 18 | let oc, close = 19 | match output with 20 | | Some path -> open_out (Fpath.to_string path), true 21 | | None -> stdout, false in 22 | let output, _ = run max json |> R.failwith_error_msg in 23 | output_string oc output 24 | ; if close then close_out oc 25 | 26 | open Cmdliner 27 | 28 | let path = 29 | let parser = Fpath.of_string in 30 | let pp = Fpath.pp in 31 | Arg.conv (parser, pp) 32 | 33 | let output = Arg.(value & opt (some path) None & info ["o"]) 34 | let max = Arg.(value & pos ~rev:true 0 int 30 & info []) 35 | let json = Arg.(value & flag & info ["j"; "json"]) 36 | let cmd = Cmd.v (Cmd.info "run") Term.(const run $ output $ max $ json) 37 | let () = exit @@ Cmd.eval cmd 38 | -------------------------------------------------------------------------------- /bench/zpipe.c: -------------------------------------------------------------------------------- 1 | /* zpipe.c: example of proper use of zlib's inflate() and deflate() 2 | Not copyrighted -- provided to the public domain 3 | Version 1.4 11 December 2005 Mark Adler */ 4 | 5 | /* Version history: 6 | 1.0 30 Oct 2004 First version 7 | 1.1 8 Nov 2004 Add void casting for unused return values 8 | Use switch statement for inflate() return values 9 | 1.2 9 Nov 2004 Add assertions to document zlib guarantees 10 | 1.3 6 Apr 2005 Remove incorrect assertion in inf() 11 | 1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions 12 | Avoid some compiler warnings for input and output buffers 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include "zlib.h" 19 | 20 | #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) 21 | # include 22 | # include 23 | # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) 24 | #else 25 | # define SET_BINARY_MODE(file) 26 | #endif 27 | 28 | #define CHUNK 16384 29 | 30 | /* Compress from file source to file dest until EOF on source. 31 | def() returns Z_OK on success, Z_MEM_ERROR if memory could not be 32 | allocated for processing, Z_STREAM_ERROR if an invalid compression 33 | level is supplied, Z_VERSION_ERROR if the version of zlib.h and the 34 | version of the library linked do not match, or Z_ERRNO if there is 35 | an error reading or writing the files. */ 36 | int def(FILE *source, FILE *dest, int level) 37 | { 38 | int ret, flush; 39 | unsigned have; 40 | z_stream strm; 41 | unsigned char in[CHUNK]; 42 | unsigned char out[CHUNK]; 43 | 44 | /* allocate deflate state */ 45 | strm.zalloc = Z_NULL; 46 | strm.zfree = Z_NULL; 47 | strm.opaque = Z_NULL; 48 | ret = deflateInit(&strm, level); 49 | if (ret != Z_OK) 50 | return ret; 51 | 52 | /* compress until end of file */ 53 | do { 54 | strm.avail_in = fread(in, 1, CHUNK, source); 55 | if (ferror(source)) { 56 | (void)deflateEnd(&strm); 57 | return Z_ERRNO; 58 | } 59 | flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; 60 | strm.next_in = in; 61 | 62 | /* run deflate() on input until output buffer not full, finish 63 | compression if all of source has been read in */ 64 | do { 65 | strm.avail_out = CHUNK; 66 | strm.next_out = out; 67 | ret = deflate(&strm, flush); /* no bad return value */ 68 | assert(ret != Z_STREAM_ERROR); /* state not clobbered */ 69 | have = CHUNK - strm.avail_out; 70 | if (fwrite(out, 1, have, dest) != have || ferror(dest)) { 71 | (void)deflateEnd(&strm); 72 | return Z_ERRNO; 73 | } 74 | } while (strm.avail_out == 0); 75 | assert(strm.avail_in == 0); /* all input will be used */ 76 | 77 | /* done when last data in file processed */ 78 | } while (flush != Z_FINISH); 79 | assert(ret == Z_STREAM_END); /* stream will be complete */ 80 | 81 | /* clean up and return */ 82 | (void)deflateEnd(&strm); 83 | return Z_OK; 84 | } 85 | 86 | /* Decompress from file source to file dest until stream ends or EOF. 87 | inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be 88 | allocated for processing, Z_DATA_ERROR if the deflate data is 89 | invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and 90 | the version of the library linked do not match, or Z_ERRNO if there 91 | is an error reading or writing the files. */ 92 | int inf(FILE *source, FILE *dest) 93 | { 94 | int ret; 95 | unsigned have; 96 | z_stream strm; 97 | unsigned char in[CHUNK]; 98 | unsigned char out[CHUNK]; 99 | 100 | /* allocate inflate state */ 101 | strm.zalloc = Z_NULL; 102 | strm.zfree = Z_NULL; 103 | strm.opaque = Z_NULL; 104 | strm.avail_in = 0; 105 | strm.next_in = Z_NULL; 106 | ret = inflateInit(&strm); 107 | if (ret != Z_OK) 108 | return ret; 109 | 110 | /* decompress until deflate stream ends or end of file */ 111 | do { 112 | strm.avail_in = fread(in, 1, CHUNK, source); 113 | if (ferror(source)) { 114 | (void)inflateEnd(&strm); 115 | return Z_ERRNO; 116 | } 117 | if (strm.avail_in == 0) 118 | break; 119 | strm.next_in = in; 120 | 121 | /* run inflate() on input until output buffer not full */ 122 | do { 123 | strm.avail_out = CHUNK; 124 | strm.next_out = out; 125 | ret = inflate(&strm, Z_NO_FLUSH); 126 | assert(ret != Z_STREAM_ERROR); /* state not clobbered */ 127 | switch (ret) { 128 | case Z_NEED_DICT: 129 | ret = Z_DATA_ERROR; /* and fall through */ 130 | case Z_DATA_ERROR: 131 | case Z_MEM_ERROR: 132 | (void)inflateEnd(&strm); 133 | return ret; 134 | } 135 | have = CHUNK - strm.avail_out; 136 | if (fwrite(out, 1, have, dest) != have || ferror(dest)) { 137 | (void)inflateEnd(&strm); 138 | return Z_ERRNO; 139 | } 140 | } while (strm.avail_out == 0); 141 | 142 | /* done when inflate() says it's done */ 143 | } while (ret != Z_STREAM_END); 144 | 145 | /* clean up and return */ 146 | (void)inflateEnd(&strm); 147 | return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; 148 | } 149 | 150 | /* report a zlib or i/o error */ 151 | void zerr(int ret) 152 | { 153 | fputs("zpipe: ", stderr); 154 | switch (ret) { 155 | case Z_ERRNO: 156 | if (ferror(stdin)) 157 | fputs("error reading stdin\n", stderr); 158 | if (ferror(stdout)) 159 | fputs("error writing stdout\n", stderr); 160 | break; 161 | case Z_STREAM_ERROR: 162 | fputs("invalid compression level\n", stderr); 163 | break; 164 | case Z_DATA_ERROR: 165 | fputs("invalid or incomplete deflate data\n", stderr); 166 | break; 167 | case Z_MEM_ERROR: 168 | fputs("out of memory\n", stderr); 169 | break; 170 | case Z_VERSION_ERROR: 171 | fputs("zlib version mismatch!\n", stderr); 172 | } 173 | } 174 | 175 | /* compress or decompress from stdin to stdout */ 176 | int main(int argc, char **argv) 177 | { 178 | int ret; 179 | 180 | /* avoid end-of-line conversions */ 181 | SET_BINARY_MODE(stdin); 182 | SET_BINARY_MODE(stdout); 183 | 184 | /* do compression if no arguments */ 185 | if (argc == 1) { 186 | ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); 187 | if (ret != Z_OK) 188 | zerr(ret); 189 | return ret; 190 | } 191 | 192 | /* do decompression if -d specified */ 193 | else if (argc == 2 && strcmp(argv[1], "-d") == 0) { 194 | ret = inf(stdin, stdout); 195 | if (ret != Z_OK) 196 | zerr(ret); 197 | return ret; 198 | } 199 | 200 | /* otherwise, report usage */ 201 | else { 202 | fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); 203 | return 1; 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /bin/decompress.ml: -------------------------------------------------------------------------------- 1 | let w = De.make_window ~bits:15 2 | let l = De.Lz77.make_window ~bits:15 3 | let o = De.bigstring_create De.io_buffer_size 4 | let i = De.bigstring_create De.io_buffer_size 5 | let q = De.Queue.create 4096 6 | let str fmt = Format.asprintf fmt 7 | let msgf fmt = Format.kasprintf (fun msg -> `Msg msg) fmt 8 | let error_msgf fmt = Format.kasprintf (fun err -> Error (`Msg err)) fmt 9 | 10 | let bigstring_input ic buf off len = 11 | let tmp = Bytes.create len in 12 | try 13 | let len = input ic tmp 0 len in 14 | for i = 0 to len - 1 do 15 | buf.{off + i} <- Bytes.get tmp i 16 | done 17 | ; len 18 | with End_of_file -> 0 19 | 20 | let bigstring_output oc buf off len = 21 | let res = Bytes.create len in 22 | for i = 0 to len - 1 do 23 | Bytes.set res i buf.{off + i} 24 | done 25 | ; output_string oc (Bytes.unsafe_to_string res) 26 | 27 | let run_inflate ic oc = 28 | let open De in 29 | let decoder = Inf.decoder `Manual ~o ~w in 30 | let rec go () = 31 | match Inf.decode decoder with 32 | | `Await -> 33 | let len = bigstring_input ic i 0 io_buffer_size in 34 | Inf.src decoder i 0 len ; go () 35 | | `Flush -> 36 | let len = io_buffer_size - Inf.dst_rem decoder in 37 | bigstring_output oc o 0 len 38 | ; Inf.flush decoder 39 | ; go () 40 | | `Malformed err -> `Error (false, str "%s." err) 41 | | `End -> 42 | let len = io_buffer_size - Inf.dst_rem decoder in 43 | if len > 0 then bigstring_output oc o 0 len 44 | ; `Ok 0 in 45 | go () 46 | 47 | let run_deflate ~level ic oc = 48 | let open De in 49 | let state = Lz77.state ~level ~q ~w:l (`Channel ic) in 50 | let encoder = Def.encoder (`Channel oc) ~q in 51 | 52 | let rec compress () = 53 | match De.Lz77.compress state with 54 | | `Await -> assert false 55 | | `Flush -> 56 | let literals = Lz77.literals state in 57 | let distances = Lz77.distances state in 58 | encode 59 | @@ Def.encode encoder 60 | (`Block 61 | { 62 | Def.kind= 63 | Dynamic (Def.dynamic_of_frequencies ~literals ~distances) 64 | ; last= false 65 | }) 66 | | `End -> 67 | Queue.push_exn q Queue.eob 68 | ; pending @@ Def.encode encoder (`Block {Def.kind= Fixed; last= true}) 69 | and pending = function `Partial | `Block -> assert false | `Ok -> () 70 | and encode = function 71 | | `Partial -> assert false 72 | | `Ok | `Block -> compress () in 73 | Def.dst encoder o 0 io_buffer_size 74 | ; compress () 75 | ; `Ok 0 76 | 77 | let run_zlib_inflate ic oc = 78 | let open Zl in 79 | let allocate bits = De.make_window ~bits in 80 | let decoder = Inf.decoder `Manual ~o ~allocate in 81 | 82 | let rec go decoder = 83 | match Inf.decode decoder with 84 | | `Await decoder -> 85 | let len = bigstring_input ic i 0 De.io_buffer_size in 86 | Inf.src decoder i 0 len |> go 87 | | `Flush decoder -> 88 | let len = De.io_buffer_size - Inf.dst_rem decoder in 89 | bigstring_output oc o 0 len 90 | ; Inf.flush decoder |> go 91 | | `Malformed err -> `Error (false, str "%s." err) 92 | | `End decoder -> 93 | let len = De.io_buffer_size - Inf.dst_rem decoder in 94 | if len > 0 then bigstring_output oc o 0 len 95 | ; `Ok 0 in 96 | go decoder 97 | 98 | let run_zlib_deflate ~level ic oc = 99 | let open Zl in 100 | let encoder = Def.encoder `Manual `Manual ~q ~w:l ~level in 101 | 102 | let rec go encoder = 103 | match Def.encode encoder with 104 | | `Await encoder -> 105 | let len = bigstring_input ic i 0 De.io_buffer_size in 106 | Def.src encoder i 0 len |> go 107 | | `Flush encoder -> 108 | let len = De.io_buffer_size - Def.dst_rem encoder in 109 | bigstring_output oc o 0 len 110 | ; Def.dst encoder o 0 De.io_buffer_size |> go 111 | | `End encoder -> 112 | let len = De.io_buffer_size - Def.dst_rem encoder in 113 | if len > 0 then bigstring_output oc o 0 len 114 | ; `Ok 0 in 115 | Def.dst encoder o 0 De.io_buffer_size |> go 116 | 117 | let run_gzip_inflate ic oc = 118 | let open Gz in 119 | let decoder = Inf.decoder `Manual ~o in 120 | 121 | let rec go decoder = 122 | match Inf.decode decoder with 123 | | `Await decoder -> 124 | let len = bigstring_input ic i 0 io_buffer_size in 125 | Inf.src decoder i 0 len |> go 126 | | `Flush decoder -> 127 | let len = io_buffer_size - Inf.dst_rem decoder in 128 | bigstring_output oc o 0 len 129 | ; Inf.flush decoder |> go 130 | | `Malformed err -> `Error (false, str "%s." err) 131 | | `End decoder -> 132 | let len = io_buffer_size - Inf.dst_rem decoder in 133 | if len > 0 then bigstring_output oc o 0 len 134 | ; `Ok 0 in 135 | go decoder 136 | 137 | let now () = Int32.of_float (Unix.gettimeofday ()) 138 | 139 | let run_gzip_deflate ~level ic oc = 140 | let open Gz in 141 | let encoder = 142 | Def.encoder `Manual `Manual ~q ~w:l ~level ~mtime:(now ()) Gz.Unix in 143 | 144 | let rec go encoder = 145 | match Def.encode encoder with 146 | | `Await encoder -> 147 | let len = bigstring_input ic i 0 io_buffer_size in 148 | Def.src encoder i 0 len |> go 149 | | `Flush encoder -> 150 | let len = io_buffer_size - Def.dst_rem encoder in 151 | bigstring_output oc o 0 len 152 | ; Def.dst encoder o 0 io_buffer_size |> go 153 | | `End encoder -> 154 | let len = io_buffer_size - Def.dst_rem encoder in 155 | if len > 0 then bigstring_output oc o 0 len 156 | ; `Ok 0 in 157 | Def.dst encoder o 0 io_buffer_size |> go 158 | 159 | external string_get_uint32 : string -> int -> int32 = "%caml_string_get32" 160 | 161 | external bigstring_set_uint32 : Lzo.bigstring -> int -> int32 -> unit 162 | = "%caml_bigstring_set32" 163 | 164 | let string_get_uint8 str idx = Char.code (String.get str idx) 165 | 166 | external bigstring_set_uint8 : Lzo.bigstring -> int -> int -> unit 167 | = "%caml_ba_set_1" 168 | 169 | let run_lzo_deflate ic oc = 170 | let wrkmem = Lzo.make_wrkmem () in 171 | let in_contents = 172 | let buf = Buffer.create 0x1000 in 173 | let tmp = Bytes.create 0x100 in 174 | let rec go () = 175 | match input ic tmp 0 (Bytes.length tmp) with 176 | | 0 -> Buffer.contents buf 177 | | len -> 178 | Buffer.add_subbytes buf tmp 0 len 179 | ; go () 180 | | exception End_of_file -> Buffer.contents buf in 181 | go () in 182 | let in_contents = 183 | let len = String.length in_contents in 184 | let res = Bigarray.Array1.create Bigarray.char Bigarray.c_layout len in 185 | let len0 = len land 3 in 186 | let len1 = len asr 2 in 187 | for i = 0 to len1 - 1 do 188 | let i = i * 4 in 189 | let v = string_get_uint32 in_contents i in 190 | bigstring_set_uint32 res i v 191 | done 192 | ; for i = 0 to len0 - 1 do 193 | let i = (len1 * 4) + i in 194 | let v = string_get_uint8 in_contents i in 195 | bigstring_set_uint8 res i v 196 | done 197 | ; res in 198 | let out_contents = 199 | Bigarray.(Array1.create char c_layout (Array1.dim in_contents * 2)) in 200 | match Lzo.compress in_contents out_contents wrkmem with 201 | | len -> 202 | bigstring_output oc out_contents 0 len 203 | ; `Ok 0 204 | | exception Invalid_argument _ -> assert false 205 | 206 | let run_lzo_inflate ic oc = 207 | let in_contents = 208 | let buf = Buffer.create 0x1000 in 209 | let tmp = Bytes.create 0x100 in 210 | let rec go () = 211 | match input ic tmp 0 (Bytes.length tmp) with 212 | | 0 -> Buffer.contents buf 213 | | len -> 214 | Buffer.add_subbytes buf tmp 0 len 215 | ; go () 216 | | exception End_of_file -> Buffer.contents buf in 217 | go () in 218 | let in_contents = 219 | let len = String.length in_contents in 220 | let res = Bigarray.Array1.create Bigarray.char Bigarray.c_layout len in 221 | let len0 = len land 3 in 222 | let len1 = len asr 2 in 223 | for i = 0 to len1 - 1 do 224 | let i = i * 4 in 225 | let v = string_get_uint32 in_contents i in 226 | bigstring_set_uint32 res i v 227 | done 228 | ; for i = 0 to len0 - 1 do 229 | let i = (len1 * 4) + i in 230 | let v = string_get_uint8 in_contents i in 231 | bigstring_set_uint8 res i v 232 | done 233 | ; res in 234 | match Lzo.uncompress_with_buffer in_contents with 235 | | Ok str -> output_string oc str ; `Ok 0 236 | | Error err -> `Error (false, str "%a." Lzo.pp_error err) 237 | 238 | let run deflate format level filename_ic filename_oc = 239 | let ic, close_ic = 240 | match filename_ic with 241 | | Some filename -> 242 | let ic = open_in_bin filename in 243 | ic, fun () -> close_in ic 244 | | None -> stdin, ignore in 245 | let oc, close_oc = 246 | match filename_oc with 247 | | Some filename -> 248 | let oc = open_out_bin filename in 249 | oc, fun () -> close_out oc 250 | | None -> stdout, ignore in 251 | let res = 252 | match deflate, format with 253 | | true, `Deflate -> run_deflate ~level ic oc 254 | | false, `Deflate -> run_inflate ic oc 255 | | true, `Zlib -> run_zlib_deflate ~level ic oc 256 | | false, `Zlib -> run_zlib_inflate ic oc 257 | | true, `Gzip -> run_gzip_deflate ~level ic oc 258 | | false, `Gzip -> run_gzip_inflate ic oc 259 | | true, `Lzo -> run_lzo_deflate ic oc 260 | | false, `Lzo -> run_lzo_inflate ic oc in 261 | close_ic () ; close_oc () ; res 262 | 263 | open Cmdliner 264 | 265 | let deflate = 266 | let doc = "Ask to deflate inputs (instead of inflate)." in 267 | Arg.(value & flag & info ["d"] ~doc) 268 | 269 | let format = 270 | let parser s = 271 | match String.lowercase_ascii s with 272 | | "zlib" -> Ok `Zlib 273 | | "gzip" -> Ok `Gzip 274 | | "deflate" -> Ok `Deflate 275 | | "lzo" -> Ok `Lzo 276 | | x -> error_msgf "Invalid format: %S" x in 277 | let pp ppf = function 278 | | `Zlib -> Format.pp_print_string ppf "zlib" 279 | | `Gzip -> Format.pp_print_string ppf "gzip" 280 | | `Deflate -> Format.pp_print_string ppf "deflate" 281 | | `Lzo -> Format.pp_print_string ppf "lzo" in 282 | let format = Arg.conv (parser, pp) in 283 | Arg.(value & opt format `Deflate & info ["f"; "format"] ~docv:"") 284 | 285 | let input = Arg.(value & pos 0 (some file) None & info [] ~docv:"") 286 | let output = Arg.(value & pos 1 (some string) None & info [] ~docv:"") 287 | 288 | let level = 289 | let parser str = 290 | match int_of_string str with 291 | | n when n >= 0 -> Ok n 292 | | _ -> Error (`Msg "The compression level must be positive") 293 | | exception _ -> Error (`Msg "Invalid level") in 294 | let positive_int = Arg.conv (parser, Format.pp_print_int) in 295 | Arg.(value & opt positive_int 4 & info ["l"; "level"] ~docv:"") 296 | 297 | let command = 298 | let doc = 299 | "A tool to deflate/inflate a stream/file throught a specified format." in 300 | let man = 301 | [ 302 | `S Manpage.s_description 303 | ; `P 304 | "$(tname) reads from the standard input and writes the \ 305 | deflated/inflated data to the standard output. Several formats \ 306 | exists:" 307 | ; `I 308 | ( "DEFLATE" 309 | , "DEFLATE is a lossless data compression file format that uses a \ 310 | combination of LZ77 and Huffman coding. It is specified in RFC 1951 \ 311 | ." ); `Noblank 312 | ; `I 313 | ( "GZip" 314 | , "GZip is a file format based on the DEFLATE algorithm, which is a \ 315 | combination of LZ77 and Huffman coding. It encodes few informations \ 316 | such as: the timestamp, the filename, or the operating system \ 317 | (which operates the deflation). It generates a CRC-32 checksum at \ 318 | the end of the stream. It is described by the RFC 1952 \ 319 | ." ); `Noblank 320 | ; `I 321 | ( "Zlib" 322 | , "Zlib is an $(i,abstraction) of the DEFLATE algorithm compression \ 323 | algorithm which terminates the stream with an ADLER-32 checksum." ) 324 | ; `Noblank 325 | ; `I 326 | ( "Lempel-Ziv-Overhumer (LZO)" 327 | , "Lempel-Ziv-Oberhumer is a lossless data compression algorithm that \ 328 | is focused on decompression speed." ); `S Manpage.s_examples 329 | ; `P 330 | "This is a small example of how to use $(tname) in your favorite shell:" 331 | ; `Pre 332 | "\\$ $(tname) -f gzip -d < file.gz\n\ 333 | Hello World!\n\ 334 | EOF\n\ 335 | \\$ $(tname) -f gzip < file.gz\n\ 336 | Hello World!\n\ 337 | \\$"; `S Manpage.s_bugs 338 | ; `P "Check bug reports at " 339 | ] in 340 | let term = Term.(ret (const run $ deflate $ format $ level $ input $ output)) 341 | and info = Cmd.info "decompress" ~doc ~man in 342 | Cmd.v info term 343 | 344 | let () = exit (Cmd.eval' command) 345 | -------------------------------------------------------------------------------- /bin/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name decompress) 3 | (modules decompress) 4 | (package decompress) 5 | (public_name decompress) 6 | (libraries 7 | checkseum.c 8 | unix 9 | decompress.de 10 | decompress.zl 11 | decompress.gz 12 | decompress.lzo 13 | cmdliner)) 14 | -------------------------------------------------------------------------------- /decompress.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | name: "decompress" 3 | maintainer: "Romain Calascibetta " 4 | authors: "Romain Calascibetta " 5 | homepage: "https://github.com/mirage/decompress" 6 | bug-reports: "https://github.com/mirage/decompress/issues" 7 | dev-repo: "git+https://github.com/mirage/decompress.git" 8 | doc: "https://mirage.github.io/decompress/" 9 | license: "MIT" 10 | synopsis: "Implementation of Zlib and GZip in OCaml" 11 | description: """Decompress is an implementation of Zlib and GZip in OCaml 12 | 13 | It provides a pure non-blocking interface to inflate and deflate data flow. 14 | """ 15 | 16 | build: [ "dune" "build" "-p" name "-j" jobs ] 17 | run-test: [ "dune" "runtest" "-p" name "-j" jobs ] 18 | 19 | depends: [ 20 | "ocaml" {>= "4.07.0"} 21 | "dune" {>= "2.8.0"} 22 | "cmdliner" {>= "1.1.0"} 23 | "optint" {>= "0.1.0"} 24 | "checkseum" {>= "0.2.0"} 25 | "bigstringaf" {with-test & >= "0.9.0"} 26 | "alcotest" {with-test & >= "1.7.0"} 27 | "fmt" {with-test & >= "0.8.7"} 28 | "camlzip" {>= "1.10" & with-test} 29 | "base64" {>= "3.0.0" & with-test} 30 | "crowbar" {with-test & >= "0.2"} 31 | "rresult" {with-test} 32 | "bos" {with-test & >= "0.2.1"} 33 | "astring" {with-test & >= "0.8.5"} 34 | ] 35 | x-maintenance-intent: [ "(latest)" ] 36 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.8) 2 | (name decompress) 3 | (version dev) 4 | (cram enable) 5 | -------------------------------------------------------------------------------- /fuzz/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name fuzz) 3 | (modules fuzz) 4 | (libraries fmt bigstringaf camlzip checkseum.c de crowbar)) 5 | 6 | (executable 7 | (name fuzz_ns) 8 | (modules fuzz_ns) 9 | (libraries rresult fmt bigstringaf camlzip checkseum.c de crowbar)) 10 | 11 | (executable 12 | (name fuzz_lzo) 13 | (modules fuzz_lzo) 14 | (libraries bigstringaf minilzo lzo crowbar)) 15 | -------------------------------------------------------------------------------- /fuzz/fuzz.ml: -------------------------------------------------------------------------------- 1 | open De 2 | 3 | let w = make_window ~bits:15 4 | let l = Lz77.make_window ~bits:15 5 | let i = bigstring_create io_buffer_size 6 | let o = bigstring_create io_buffer_size 7 | let q = Queue.create 4096 8 | 9 | exception End_of_input 10 | 11 | let zlib bytes = 12 | let off = ref 0 in 13 | let buf = Buffer.create 16 in 14 | try 15 | Zlib.uncompress ~header:false 16 | (fun ibuf -> 17 | if String.length bytes - !off = 0 then raise End_of_input 18 | ; let len = min (Bytes.length ibuf) (String.length bytes - !off) in 19 | Bytes.blit_string bytes !off ibuf 0 len 20 | ; off := !off + len 21 | ; len) 22 | (fun obuf len -> Buffer.add_subbytes buf obuf 0 len) 23 | ; Buffer.contents buf 24 | with 25 | | Zlib.Error _ -> Crowbar.bad_test () 26 | | End_of_input -> Crowbar.bad_test () 27 | 28 | let z bytes = 29 | let decoder = Inf.decoder (`String bytes) ~o ~w in 30 | let buf = Buffer.create 16 in 31 | 32 | let rec go () = 33 | match Inf.decode decoder with 34 | | `Await -> assert false 35 | | `Flush -> 36 | let len = io_buffer_size - Inf.dst_rem decoder in 37 | let res = Bigstringaf.substring o ~off:0 ~len in 38 | Buffer.add_string buf res ; Inf.flush decoder ; go () 39 | | `End -> 40 | let len = io_buffer_size - Inf.dst_rem decoder in 41 | let res = Bigstringaf.substring o ~off:0 ~len in 42 | Buffer.add_string buf res ; Inf.flush decoder ; Buffer.contents buf 43 | | `Malformed err -> Crowbar.fail err in 44 | go () 45 | 46 | let pp_chr = 47 | Fmt.using (function '\032' .. '\126' as x -> x | _ -> '.') Fmt.char 48 | 49 | let pp_scalar : 50 | type buffer. 51 | get:(buffer -> int -> char) -> length:(buffer -> int) -> buffer Fmt.t = 52 | fun ~get ~length ppf b -> 53 | let l = length b in 54 | for i = 0 to l / 16 do 55 | Fmt.pf ppf "%08x: " (i * 16) 56 | ; let j = ref 0 in 57 | while !j < 16 do 58 | if (i * 16) + !j < l then 59 | Fmt.pf ppf "%02x" (Char.code @@ get b ((i * 16) + !j)) 60 | else Fmt.pf ppf " " 61 | ; if !j mod 2 <> 0 then Fmt.pf ppf " " 62 | ; incr j 63 | done 64 | ; Fmt.pf ppf " " 65 | ; j := 0 66 | ; while !j < 16 do 67 | if (i * 16) + !j < l then 68 | Fmt.pf ppf "%a" pp_chr (get b ((i * 16) + !j)) 69 | else Fmt.pf ppf " " 70 | ; incr j 71 | done 72 | ; Fmt.pf ppf "@\n" 73 | done 74 | 75 | let pp_string = pp_scalar ~get:String.get ~length:String.length 76 | 77 | let uniq = 78 | let v = ref (-1) in 79 | fun () -> incr v ; !v 80 | 81 | let ( >>= ) = Crowbar.dynamic_bind 82 | 83 | let random_input_as_inflate () = 84 | Crowbar.add_test ~name:"z/zlib" Crowbar.[range 128 >>= bytes_fixed] 85 | @@ fun bytes -> 86 | let res0 = zlib bytes in 87 | let res1 = z bytes in 88 | 89 | Crowbar.check_eq ~pp:pp_string ~eq:String.equal res0 res1 90 | 91 | let literal chr = `Literal chr 92 | let ( <.> ) f g x = f (g x) 93 | 94 | let gen_cmd = 95 | Crowbar.choose 96 | [ 97 | Crowbar.(map [range 256] (literal <.> Char.chr)) 98 | ; Crowbar.( 99 | map [range 256; range 32768] (fun len off -> `Copy (1 + off, 3 + len))) 100 | ] 101 | 102 | let frequencies_of_cmds cmds = 103 | let literals = make_literals () in 104 | let distances = make_distances () in 105 | List.iter 106 | (function 107 | | `Literal chr -> succ_literal literals chr 108 | | `Copy (off, len) -> 109 | succ_length literals len 110 | ; succ_distance distances off) 111 | cmds 112 | ; literals, distances 113 | 114 | let check_cmds cmds = 115 | let exception Bad in 116 | let write = ref 0 in 117 | try 118 | List.iter 119 | (function 120 | | `Literal _ -> incr write 121 | | `Copy (off, len) -> 122 | if !write - off < 0 then raise_notrace Bad 123 | ; write := !write + len) 124 | cmds 125 | ; true 126 | with Bad -> false 127 | 128 | let apply_cmds cmds = 129 | let buf = Buffer.create 16 in 130 | List.iter 131 | (function 132 | | `Literal chr -> Buffer.add_char buf chr 133 | | `Copy (off, len) -> 134 | for _ = 0 to len - 1 do 135 | Buffer.add_char buf (Buffer.nth buf (Buffer.length buf - off)) 136 | done) 137 | cmds 138 | ; Buffer.contents buf 139 | 140 | let zlib bytes = 141 | let off = ref 0 in 142 | let buf = Buffer.create 16 in 143 | try 144 | Zlib.uncompress ~header:false 145 | (fun ibuf -> 146 | if String.length bytes - !off = 0 then raise End_of_input 147 | ; let len = min (Bytes.length ibuf) (String.length bytes - !off) in 148 | Bytes.blit_string bytes !off ibuf 0 len 149 | ; off := !off + len 150 | ; len) 151 | (fun obuf len -> Buffer.add_subbytes buf obuf 0 len) 152 | ; Buffer.contents buf 153 | with 154 | | Zlib.Error _ -> Crowbar.bad_test () 155 | | End_of_input -> Crowbar.bad_test () 156 | 157 | let pp_chr = 158 | Fmt.using 159 | (function 160 | | '\032' .. '\126' as x -> String.make 1 x 161 | | chr -> Fmt.str "\\%03d" (Char.code chr)) 162 | Fmt.string 163 | 164 | let pp_cmd ppf = function 165 | | `Literal chr -> Fmt.pf ppf "(`Literal %a)" pp_chr chr 166 | | `Copy (off, len) -> Fmt.pf ppf "(`Copy off:%d, len:%d)" off len 167 | 168 | type cmd = [ `Literal of char | `Copy of int * int | `End ] 169 | 170 | let () = 171 | Crowbar.add_test ~name:"z/zlib" [Crowbar.list gen_cmd] @@ fun cmds -> 172 | if not (check_cmds cmds) then Crowbar.bad_test () 173 | ; Queue.reset q 174 | 175 | ; List.iter (Queue.push_exn q <.> Queue.cmd) (cmds :> cmd list) 176 | ; Queue.push_exn q Queue.eob 177 | 178 | ; let expected = apply_cmds cmds in 179 | let buf = Buffer.create 16 in 180 | let literals, distances = frequencies_of_cmds cmds in 181 | let dynamic = Def.dynamic_of_frequencies ~literals ~distances in 182 | let encoder = Def.encoder (`Buffer buf) ~q in 183 | List.iter 184 | (fun v -> 185 | match Def.encode encoder v with 186 | | `Ok -> () 187 | | `Block -> Crowbar.fail "Impossible `Block case" 188 | | `Partial -> Crowbar.fail "Impossible `Partial case") 189 | [`Block {Def.kind= Def.Dynamic dynamic; last= true}; `Flush] 190 | ; let bytes = Buffer.contents buf in 191 | let res0 = zlib bytes in 192 | let res1 = z bytes in 193 | 194 | Crowbar.check_eq ~pp:pp_string ~eq:String.equal expected res0 195 | ; Crowbar.check_eq ~pp:pp_string ~eq:String.equal res0 res1 196 | 197 | let ( <.> ) f g x = f (g x) 198 | 199 | let non_empty_bytes n : string Crowbar.gen = 200 | let open Crowbar in 201 | let ( >>= ) = dynamic_bind in 202 | 203 | let rec go acc = function 204 | | 0 -> concat_gen_list (const "") acc 205 | | n -> go (map [uint8] (String.make 1 <.> Char.chr) :: acc) (pred n) in 206 | let gen n = go [] n in 207 | 208 | range n >>= (gen <.> succ) 209 | 210 | let reconstruct lst = 211 | let len = 212 | List.fold_left 213 | (fun a -> function 214 | | `Literal _ -> 1 + a 215 | | `Copy (_, len) -> len + a 216 | | `End -> a) 217 | 0 lst in 218 | let res = Bytes.create len in 219 | let pos = ref 0 in 220 | List.iter 221 | (function 222 | | `End -> () 223 | | `Literal chr -> Bytes.set res !pos chr ; incr pos 224 | | `Copy (off, len) -> 225 | for _ = 0 to len - 1 do 226 | Bytes.set res !pos (Bytes.get res (!pos - off)) 227 | ; incr pos 228 | done) 229 | lst 230 | ; Bytes.unsafe_to_string res 231 | 232 | let pp_code ppf = function 233 | | `Literal chr -> Fmt.pf ppf "(`Literal %02x:%a)" (Char.code chr) pp_chr chr 234 | | `Copy (off, len) -> Fmt.pf ppf "(`Copy off:%d len:%d)" off len 235 | | `End -> Fmt.pf ppf "`End" 236 | 237 | let () = 238 | Crowbar.add_test ~name:"lz77" [Crowbar.list (non_empty_bytes 1024)] 239 | @@ fun inputs -> 240 | Queue.reset q 241 | ; let state = Lz77.state `Manual ~w:l ~q in 242 | let res = ref [] in 243 | let rec go inputs = 244 | match Lz77.compress state with 245 | | `End -> 246 | let lst = Queue.to_list q in 247 | res := lst :: !res 248 | ; Queue.junk_exn q (List.length lst) 249 | ; List.rev !res 250 | | `Flush -> 251 | let lst = Queue.to_list q in 252 | res := lst :: !res 253 | ; Queue.junk_exn q (List.length lst) 254 | ; go inputs 255 | | `Await -> ( 256 | match inputs with 257 | | [] -> 258 | Lz77.src state Bigstringaf.empty 0 0 259 | ; go [] 260 | | x :: r -> 261 | let x = Bigstringaf.of_string x ~off:0 ~len:(String.length x) in 262 | Lz77.src state x 0 (Bigstringaf.length x) 263 | ; go r) in 264 | let res = go inputs in 265 | let res = List.concat res in 266 | 267 | Crowbar.check_eq ~pp:pp_string ~eq:String.equal ~cmp:String.compare 268 | (String.concat "" inputs) (reconstruct res) 269 | 270 | let split payload = 271 | let res = ref [] in 272 | let tmp = Bytes.create 1024 in 273 | let rec go consumed pos = 274 | if consumed + pos = String.length payload then 275 | if pos = 0 then List.rev !res 276 | else List.rev (Bytes.sub_string tmp 0 pos :: !res) 277 | else if pos = 1024 then ( 278 | res := Bytes.to_string tmp :: !res 279 | ; go (consumed + 1024) 0) 280 | else ( 281 | Bytes.set tmp pos payload.[consumed + pos] 282 | ; go consumed (succ pos)) in 283 | go 0 0 284 | 285 | let () = 286 | Crowbar.add_test ~name:"compress/uncompress" 287 | [Crowbar.list (non_empty_bytes 1024)] 288 | @@ fun inputs -> 289 | Queue.reset q 290 | ; let res = Buffer.create 4096 in 291 | let payloads = ref inputs in 292 | 293 | let flush o len = 294 | for i = 0 to len - 1 do 295 | Buffer.add_char res o.{i} 296 | done in 297 | let refill i = 298 | match !payloads with 299 | | [] -> 0 300 | | data :: rest -> 301 | for x = 0 to String.length data - 1 do 302 | i.{x} <- data.[x] 303 | done 304 | ; payloads := rest 305 | ; String.length data in 306 | Higher.compress ~w:l ~q ~refill ~flush i o 307 | 308 | ; let splits = split (Buffer.contents res) in 309 | Buffer.clear res 310 | ; payloads := splits 311 | 312 | ; let flush o len = 313 | for i = 0 to len - 1 do 314 | Buffer.add_char res o.{i} 315 | done in 316 | let refill i = 317 | match !payloads with 318 | | [] -> 0 319 | | data :: rest -> 320 | for x = 0 to String.length data - 1 do 321 | i.{x} <- data.[x] 322 | done 323 | ; payloads := rest 324 | ; String.length data in 325 | 326 | match Higher.uncompress ~w ~refill ~flush i o with 327 | | Ok () -> 328 | Crowbar.check_eq ~eq:String.equal ~pp:pp_string ~cmp:String.compare 329 | (Buffer.contents res) (String.concat "" inputs) 330 | | Error (`Msg err) -> Crowbar.fail err 331 | 332 | let q = Queue.create 0x10000 333 | 334 | let () = 335 | Crowbar.add_test ~name:"flat compression" 336 | Crowbar.[list1 (range ~min:1 128 >>= bytes_fixed)] 337 | @@ fun inputs -> 338 | let res = Buffer.create 4096 in 339 | let payloads = ref inputs in 340 | let encoder = Def.encoder (`Buffer res) ~q in 341 | 342 | let fill q s = 343 | for i = 0 to String.length s - 1 do 344 | Queue.push_exn q (Queue.literal s.[i]) 345 | done in 346 | 347 | let rec go last = function 348 | | `Ok when last -> Buffer.contents res 349 | | `Ok -> ( 350 | match !payloads with 351 | | [] -> assert false 352 | | x :: r -> 353 | fill q x 354 | ; payloads := r 355 | ; go last (Def.encode encoder `Flush)) 356 | | `Partial -> assert false 357 | | `Block -> 358 | let last = List.length !payloads <= 0 in 359 | go last (Def.encode encoder (`Block {Def.kind= Def.Flat; last})) in 360 | let res0 = 361 | go false (Def.encode encoder (`Block {Def.kind= Def.Flat; last= false})) 362 | in 363 | let buf1 = Buffer.create 4096 in 364 | 365 | let flush b l = 366 | for i = 0 to l - 1 do 367 | Buffer.add_char buf1 b.{i} 368 | done in 369 | match Higher.of_string ~o ~w res0 ~flush with 370 | | Ok () -> 371 | Crowbar.check_eq ~eq:String.equal ~pp:pp_string ~cmp:String.compare 372 | (Buffer.contents buf1) (String.concat "" inputs) 373 | | Error (`Msg err) -> Crowbar.fail err 374 | -------------------------------------------------------------------------------- /fuzz/fuzz_lzo.ml: -------------------------------------------------------------------------------- 1 | open Crowbar 2 | 3 | let ( <.> ) f g x = f (g x) 4 | 5 | let non_empty_bytes n : string Crowbar.gen = 6 | let open Crowbar in 7 | let ( >>= ) = dynamic_bind in 8 | 9 | let rec go acc = function 10 | | 0 -> concat_gen_list (const "") acc 11 | | n -> go (map [uint8] (String.make 1 <.> Char.chr) :: acc) (pred n) in 12 | let gen n = go [] n in 13 | 14 | range n >>= (gen <.> succ) 15 | 16 | let wrkmem = Lzo.make_wrkmem () 17 | 18 | let () = 19 | add_test ~name:"lzo/minilzo" [non_empty_bytes 256] @@ fun str -> 20 | let bstr = Bigstringaf.of_string str ~off:0 ~len:(String.length str) in 21 | let output = Bigstringaf.create 65536 in 22 | let len = Lzo.compress bstr output wrkmem in 23 | let buf = Bytes.create (String.length str) in 24 | let len = 25 | Minilzo.uncompress 26 | ~src:(Bigstringaf.substring output ~off:0 ~len) 27 | ~src_off:0 ~src_len:len ~dst:buf ~dst_off:0 in 28 | check_eq str (Bytes.sub_string buf 0 len) 29 | 30 | let () = 31 | add_test ~name:"minilzo/lzo" [non_empty_bytes 256] @@ fun str -> 32 | let buf = Bytes.create 65536 in 33 | let len = 34 | Minilzo.compress ~src:str ~src_off:0 ~src_len:(String.length str) ~dst:buf 35 | ~dst_off:0 in 36 | let bstr = Bigstringaf.of_string (Bytes.unsafe_to_string buf) ~off:0 ~len in 37 | match Lzo.uncompress_with_buffer bstr with 38 | | Ok str' -> check_eq str str' 39 | | Error _ -> Crowbar.fail "Invalid output of minilzo" 40 | -------------------------------------------------------------------------------- /fuzz/fuzz_ns.ml: -------------------------------------------------------------------------------- 1 | open De 2 | 3 | let w = make_window ~bits:15 4 | let i = bigstring_create 1024 5 | let o = bigstring_create (Def.Ns.compress_bound 1024) 6 | 7 | let pp_chr = 8 | Fmt.using (function '\032' .. '\126' as x -> x | _ -> '.') Fmt.char 9 | 10 | let pp_scalar : 11 | type buffer. 12 | get:(buffer -> int -> char) -> length:(buffer -> int) -> buffer Fmt.t = 13 | fun ~get ~length ppf b -> 14 | let l = length b in 15 | for i = 0 to l / 16 do 16 | Fmt.pf ppf "%08x: " (i * 16) 17 | ; let j = ref 0 in 18 | while !j < 16 do 19 | if (i * 16) + !j < l then 20 | Fmt.pf ppf "%02x" (Char.code @@ get b ((i * 16) + !j)) 21 | else Fmt.pf ppf " " 22 | ; if !j mod 2 <> 0 then Fmt.pf ppf " " 23 | ; incr j 24 | done 25 | ; Fmt.pf ppf " " 26 | ; j := 0 27 | ; while !j < 16 do 28 | if (i * 16) + !j < l then 29 | Fmt.pf ppf "%a" pp_chr (get b ((i * 16) + !j)) 30 | else Fmt.pf ppf " " 31 | ; incr j 32 | done 33 | ; Fmt.pf ppf "@\n" 34 | done 35 | 36 | let pp_string = pp_scalar ~get:String.get ~length:String.length 37 | let ( >>= ) = Crowbar.dynamic_bind 38 | let ( <.> ) f g x = f (g x) 39 | 40 | let non_empty_bytes n : string Crowbar.gen = 41 | let open Crowbar in 42 | let ( >>= ) = dynamic_bind in 43 | 44 | let rec go acc = function 45 | | 0 -> concat_gen_list (const "") acc 46 | | n -> go (map [uint8] (String.make 1 <.> Char.chr) :: acc) (pred n) in 47 | let gen n = go [] n in 48 | 49 | range n >>= (gen <.> succ) 50 | 51 | let () = 52 | Crowbar.add_test ~name:"compress ns/uncompress ns" [non_empty_bytes 1024] 53 | @@ fun input -> 54 | let len = String.length input in 55 | Bigstringaf.blit_from_string input ~src_off:0 i ~dst_off:0 ~len 56 | ; let src_def = Bigstringaf.sub i ~off:0 ~len in 57 | let res = Def.Ns.deflate src_def o in 58 | let res = Rresult.R.get_ok res in 59 | let src_inf = Bigstringaf.sub o ~off:0 ~len:res in 60 | let res = Inf.Ns.inflate src_inf i in 61 | match res with 62 | | Ok (_, res) -> 63 | let output = Bigstringaf.sub i ~off:0 ~len:res in 64 | Crowbar.check_eq ~eq:String.equal ~pp:pp_string ~cmp:String.compare 65 | (Bigstringaf.to_string output) 66 | input 67 | | Error _err -> Crowbar.fail "iso fail" 68 | -------------------------------------------------------------------------------- /lib/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name de) 3 | (public_name decompress.de) 4 | (modules de) 5 | (ocamlopt_flags -O3 -unbox-closures -unbox-closures-factor 20) 6 | (libraries optint checkseum)) 7 | 8 | (library 9 | (name lz) 10 | (public_name decompress.lz) 11 | (modules lz) 12 | (ocamlopt_flags -O3 -unbox-closures -unbox-closures-factor 20) 13 | (libraries optint checkseum de)) 14 | 15 | (library 16 | (name rfc1951) 17 | (public_name rfc1951) 18 | (modules rfc1951) 19 | (libraries optint checkseum)) 20 | 21 | (rule 22 | (deps de.ml) 23 | (targets rfc1951.ml) 24 | (action 25 | (copy de.ml rfc1951.ml))) 26 | 27 | (library 28 | (name zl) 29 | (public_name decompress.zl) 30 | (modules zl) 31 | (libraries optint checkseum de)) 32 | 33 | (library 34 | (name gz) 35 | (public_name decompress.gz) 36 | (modules gz) 37 | (libraries optint checkseum de)) 38 | 39 | (library 40 | (name lzo) 41 | (public_name decompress.lzo) 42 | (modules lzo)) 43 | 44 | (alias 45 | (name default) 46 | (package decompress)) 47 | -------------------------------------------------------------------------------- /lib/gz.mli: -------------------------------------------------------------------------------- 1 | (** {1 GZIP layer.} 2 | 3 | GZIP is a standard on top of RFC1951 according RFC1952. It uses the {!De} 4 | implementation with the LZ77 compression algorithm. Module provides 5 | non-blocking streaming codec to {{:#decode}decode} and {{:#encode}encode} 6 | GZIP encoding. It can efficiently work payload by payload without blocking IO. *) 7 | 8 | type bigstring = 9 | (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t 10 | (** Type type for [bigstring]. *) 11 | 12 | type window = De.window 13 | (** The type for sliding window. *) 14 | 15 | val io_buffer_size : int 16 | 17 | (** The type for Operating-System. *) 18 | type os = 19 | | FAT 20 | | Amiga 21 | | VMS 22 | | Unix 23 | | VM 24 | | Atari 25 | | HPFS 26 | | Macintosh 27 | | Z 28 | | CPM 29 | | TOPS20 30 | | NTFS 31 | | QDOS 32 | | Acorn 33 | | Unknown 34 | 35 | val pp_os : Format.formatter -> os -> unit 36 | (** Pretty-printer of {!os}. *) 37 | 38 | val equal_os : os -> os -> bool 39 | (** [equal_os a b] returns [true] if [a] is exactly the same {!os} than [b]. 40 | Otherwise, it returns [false]. *) 41 | 42 | (** {2:decode GZIP Decoder.} 43 | 44 | Unlike [de], [gz] provides a referentially transparent {!Inf.decoder}. The 45 | client must use a {!Inf.decoder} given {b by} {!Inf.decode} instead of a 46 | decoder given {b to} {!Inf.decode}. A common use of [gz] is: 47 | 48 | {[ 49 | let rec go d0 = match Inf.decode d0 with 50 | | `Await d1 -> ... go d1 51 | | `Flush d1 -> ... go d1 52 | | _ -> .... in 53 | ]} *) 54 | 55 | module Inf : sig 56 | type decoder 57 | (** The type for decoders. *) 58 | 59 | type src = [ `Channel of in_channel | `String of string | `Manual ] 60 | (** The type for input sources. With a [`Manual] source the client must 61 | provide input with {!src}. With [`String] or [`Channel] source the client 62 | can safely discard [`Await] case (with [assert false]). *) 63 | 64 | type signal = 65 | [ `Await of decoder 66 | | `Flush of decoder 67 | | `End of decoder 68 | | `Malformed of string ] 69 | 70 | val decoder : src -> o:bigstring -> decoder 71 | (** [decoder src ~o] is a decoder that inputs from [src]. 72 | 73 | {b Output buffer.} 74 | 75 | [gz], as [de], uses [o] buffer as internal buffer to store output. We 76 | recommend to allocate an {!io_buffer_size} buffer as output buffer. Then, 77 | {!dst_rem}[ decoder] tells you how many unused bytes remain in [o]. *) 78 | 79 | val decode : decoder -> signal 80 | (** [decode d0] is: 81 | 82 | {ul 83 | {- [`Await d1] if [d0] has a [`Manual] input source and awaits for more 84 | input. The client must use a {!src} with [d1] to provide it.} 85 | {- [`Flush d1] if given output buffer [o] (see {!decoder}) needs to be 86 | drained before work can be resumed. The client must use {!flush} with [d1] 87 | to {b completely} flush [o]. Usually [o] will be full and consist fully of 88 | bytes that need to be copied from the buffer, but sometimes only the first 89 | part of the buffer is used. In those cases {!dst_rem} will give you the 90 | amount of free/unused bytes remain in [o]. These should {b not} be copied 91 | since their contents are not part of the output. Instead, the first 92 | [bigstring_length o - Inf.dst_rem d1] bytes should be copied when flushing 93 | [o].} 94 | {- [`Malformed err] if given input is malformed. [err] is a human-readable 95 | error message.} 96 | {- [`End d1] if given input notify end of flow. [o] is possibly not empty 97 | (it can be check with {!dst_rem}).}} *) 98 | 99 | val reset : decoder -> decoder 100 | (** [reset d] is a [d] in its original state, as it was initialized by 101 | {!decoder}. *) 102 | 103 | val src : decoder -> bigstring -> int -> int -> decoder 104 | (** [src d s j l] provides [d] with [l] bytes to read, starting at [j] in [s]. 105 | This byte range is read by calls to {!decode} with [d] until [`Await] is 106 | returned. To signal the end of input call the function with [l = 0]. 107 | 108 | @raise Invalid_argument when [j] and [l] do not correspond to a valid 109 | range. *) 110 | 111 | val dst_rem : decoder -> int 112 | (** [dst_rem d] is how many unused bytes remain in the output buffer of [d]. *) 113 | 114 | val src_rem : decoder -> int 115 | (** [src_rem d] is how many unprocessed bytes remain in the input buffer of 116 | [d]. *) 117 | 118 | val write : decoder -> int 119 | (** [write d] is how many bytes [d] emitted since it was created. *) 120 | 121 | val flush : decoder -> decoder 122 | (** [flush d] is a decoder where internal output buffer [o] is {b completely} 123 | free to store bytes. *) 124 | 125 | val filename : decoder -> string option 126 | (** [filename d] returns the {i filename} of the flow if it exists. This can 127 | be called anytime but should be called when the [`End] case appears (and 128 | ensure that the GZIP header was computed). *) 129 | 130 | val comment : decoder -> string option 131 | (** [comment d] returns the {i comment} of the flow if it exists. This can be 132 | called anytime but should be called when the [`End] case appears (and 133 | ensure that the GZIP header was computed). *) 134 | 135 | val os : decoder -> os 136 | (** [os d] returns the {!os} where the flow was compressed. It should be 137 | called when the [`End] case appears (and ensure that the GZIP header was 138 | computed). *) 139 | 140 | val extra : key:string -> decoder -> string option 141 | (** [extra ~key d] returns extra {i field} [key] if it exists. This can be 142 | called anytime but should be called when the [`End] case appears (and 143 | ensure that the GZIP header was computed). 144 | 145 | @raise Invalid_argument if the length of the given [key] is not equal to 2. *) 146 | end 147 | 148 | (** {2:encode GZIP Encoder.} 149 | 150 | GZIP encoder is glue between the LZ77 algorithm and the DEFLATE encoder, 151 | prefixed with a GZIP header. Any deal with compression algorithm is not 152 | possible on this layer (see {!De} for more details). As {!Inf}, and unlike 153 | {!De}, {!Gz} provides a referentially transparent encoder. 154 | 155 | The client must use the {!Def.encoder} given {b by} {!Def.encode} instead a 156 | [encoder] given {b to} {!Def.encode}. *) 157 | 158 | module Def : sig 159 | type src = [ `Channel of in_channel | `String of string | `Manual ] 160 | (** The type for input sources. With a [`Manual] source the client must 161 | provide input with {!src}. With [`String] or [`Channel] source the client 162 | can safely discard [`Await] case (with [assert false]). *) 163 | 164 | type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] 165 | (** The type for output destinations. With a [`Manual] destination the client 166 | must provide output storage with {!dst}. With [`Buffer] or [`Channel] 167 | destination the client can safely discard [`Flush] case (with [assert 168 | false]). *) 169 | 170 | type encoder 171 | (** The type for GZIP encoders. *) 172 | 173 | type ret = [ `Await of encoder | `End of encoder | `Flush of encoder ] 174 | 175 | val encoder : 176 | src 177 | -> dst 178 | -> ?ascii:bool 179 | -> ?hcrc:bool 180 | -> ?filename:string 181 | -> ?comment:string 182 | -> mtime:int32 183 | -> os 184 | -> q:De.Queue.t 185 | -> w:De.Lz77.window 186 | -> level:int 187 | -> encoder 188 | (** [encoder src dst ~mtime os ~q ~w ~level] is an encoder that inputs from 189 | [src] and that outputs to [dst]. 190 | 191 | {b Internal queue.} 192 | 193 | [encoder] deals internally with compression algorithm and DEFLATE encoder. 194 | To pass compression values to DEFLATE encoder, we need a queue [q]. Length 195 | of [q] has an impact on performance, and small lengths can be a bottleneck, 196 | leading {!encode} to emit many [`Flush]. We recommend a que as large as 197 | output buffer. 198 | 199 | {b Window.} 200 | 201 | GZIP needs a sliding window to operate the LZ77 compression. The window 202 | must be a {i 32k} window ({!De.make_window} with [bits = 15]). The 203 | allocated window can be re-used by an other inflation/deflation process - 204 | but it {b can not} be re-used concurrently or cooperatively with another 205 | inflation/deflation process. 206 | 207 | {b Level.} 208 | 209 | Current implementation of GZIP does not handle any compression level. 210 | However, the client must give a level between 0 and 9, inclusively, 211 | Otherwise, we raise an [Invalid_argument]. 212 | 213 | {b Metadata.} 214 | 215 | Client is able to store some {i metadata} such as: 216 | {ul 217 | {- [mtime] time of last modification of the input.} 218 | {- [os] {!os} which did the compression.} 219 | {- [filename] {i filename} of the input (no limitation about length).} 220 | {- [comment] an arbitrary {i payload} (no limitation about length).} 221 | {- [ascii] if encoding of contents is ASCII.} 222 | {- [hcrc] if the client wants a checksum of the GZIP header.}} *) 223 | 224 | val src_rem : encoder -> int 225 | (** [src_rem e] is how many bytes it remains in given input buffer. *) 226 | 227 | val dst_rem : encoder -> int 228 | (** [dst_rem e] is how many unused bytes remain in the output buffer of [e]. *) 229 | 230 | val src : encoder -> bigstring -> int -> int -> encoder 231 | (** [src e s j l] provides [e] with [l] bytes to read, starting at [j] in [s]. 232 | This byte range is read by calls to {!encode} with [e] until [`Await] is 233 | returned. To signal the end of input call the function with [l = 0]. 234 | 235 | @raise Invalid_argument when [j] and [l] do not correspond to a valid 236 | range. *) 237 | 238 | val dst : encoder -> bigstring -> int -> int -> encoder 239 | (** [dst e s j l] provides [e] with [l] bytes available to write, starting at 240 | [j] in [s]. This byte range is fill by calls to {!encode} with [e] until 241 | [`Flush] is returned. 242 | 243 | @raise Invalid_argument when [j] and [l] do not correspond to a valid 244 | range. *) 245 | 246 | val encode : encoder -> ret 247 | (** [encode e0] is: 248 | 249 | {ul 250 | {- [`Await e1] if [e0] has a [`Manual] input source and awaits for more 251 | input. The client must use {!src} with [e1] to provide it.} 252 | {- [`Flush e1] if [e0] has a [`Manual] destination and needs more output 253 | storage. The client must drain the buffer before resuming operation.} 254 | {- [`End e1] if [e0] encoded all input. Output buffer is possibly not 255 | empty (it can be check with {!dst_rem}).}} 256 | 257 | {b Limitation.} 258 | 259 | The encoder must manipulate an output buffer of, at least, 2 bytes. If it's 260 | not the case, [encode] does nothing - and it tells you nothing more than it 261 | did nothing. Depending on what you do, a loop can infinitely call [encode] 262 | without any updates until the given output still has less than 2 bytes. 263 | *) 264 | end 265 | 266 | module Higher : sig 267 | type 't configuration 268 | (** Type of the Operating-System configuration. *) 269 | 270 | val configuration : 271 | ?ascii:bool -> ?hcrc:bool -> os -> ('t -> int32) -> 't configuration 272 | (** [configuration ?ascii ?hcrc os mtime] makes an Operating-System 273 | {!configuration} to be able to {!compress} any inputs. *) 274 | 275 | val compress : 276 | ?level:int 277 | -> ?filename:string 278 | -> ?comment:string 279 | -> w:De.Lz77.window 280 | -> q:De.Queue.t 281 | -> refill:(bigstring -> int) 282 | -> flush:(bigstring -> int -> unit) 283 | -> 't 284 | -> 't configuration 285 | -> bigstring 286 | -> bigstring 287 | -> unit 288 | (** [compress ?level ?filename ?comment ~w ~q ~refill ~flush time cfg i o] compresses 289 | an input given by [refill] and outputs it via [flush]. It requires: 290 | - a queue [q] which is shared between the compression algorithm and 291 | the encoder. The length of it can be a bottleneck on the throughput 292 | - a {i window} to be able to lookup repeated patterns 293 | - a {i witness} required by the given [cfg] 294 | - a {!configuration} value 295 | - [i] is the input buffer 296 | - [o] is the output buffer 297 | 298 | When [compress] wants more input, it calls [refill] with [i]. The client 299 | returns how many bytes he wrotes into [i]. If he returns 0, he signals end 300 | of input. 301 | 302 | When [compress] has written output buffer, it calls [flush] with [o] and 303 | how many bytes it wrote. Bytes into [o] must be {b copied} and they will be 304 | lost at the next call to [flush]. 305 | 306 | A simple example of how to use such interface (with [unix]) is: 307 | {[ 308 | let time () = Int32.of_float (Unix.gettimeofday ()) 309 | 310 | let deflate_string ?(level= 4) str = 311 | let i = De.bigstring_create De.io_buffer_size in 312 | let o = De.bigstring_create De.io_buffer_size in 313 | let w = De.Lz77.make_window ~bits:15 in 314 | let q = De.Queue.create 0x1000 in 315 | let r = Buffer.create 0x1000 in 316 | let p = ref 0 in 317 | let cfg = Gz.Higher.configuration Gz.Unix time in 318 | let refill buf = 319 | let len = min (String.length str - !p) De.io_buffer_size in 320 | Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len ; 321 | p := !p + len ; len in 322 | let flush buf len = 323 | let str = Bigstringaf.substring buf ~off:0 ~len in 324 | Buffer.add_string r str in 325 | Gz.Higher.compress ~w ~q ~level ~refill ~flush () cfg i o ; Buffer.contents r 326 | ]} 327 | 328 | As {!De.Higher.compress} or {!Zl.Higher.compress}, [decompress] don't want 329 | to take the responsability of such function. It's why this function exists 330 | only as an example. Especially since a GZip compression requires an Unix 331 | {i syscall} (see [Unix.gettimeofday]) which does not exists on some contexts 332 | such as MirageOS. 333 | 334 | The speed and the compression ratio depends on the length of the given {i window} 335 | and the given {!De.Queue.t}. They can be a serious bottleneck on the throughput. 336 | Due to all of these choices, we show this function as an example - but it should 337 | not be copied as is! *) 338 | 339 | type metadata = { 340 | filename: string option 341 | ; comment: string option 342 | ; os: os 343 | ; extra: key:string -> string option 344 | } 345 | (** Type of {i metadata} available into a GZIP flow. *) 346 | 347 | val uncompress : 348 | refill:(bigstring -> int) 349 | -> flush:(bigstring -> int -> unit) 350 | -> bigstring 351 | -> bigstring 352 | -> (metadata, [> `Msg of string ]) result 353 | (** [uncompress ~refill ~flush i o] uncompresses an input given by [refill] 354 | and outputs it via [flush]. It requires: 355 | - [i] as the input buffer 356 | - [o] as the output buffer 357 | 358 | It returns then extracted {i metadata} from the given GZip flow. 359 | 360 | When [uncompress] wants more input, it calls [refill] with [i]. The client 361 | returns how many bytes he wrote into [i]. If he returns 0, he signals end 362 | of input. 363 | 364 | When [uncompress] has written output buffer, it calls [flush] with [o] and 365 | how many bytes it wrote. Bytes into [o] must be {b copied} and tjey will be 366 | lost at the next call to [flush]. 367 | 368 | A simple example of how to use such interface is: 369 | {[ 370 | let inflate_string str = 371 | let i = De.bigstring_create De.io_buffer_size in 372 | let o = De.bigstring_create De.io_buffer_size in 373 | let r = Buffer.create 0x1000 in 374 | let p = ref 0 in 375 | let refill buf = 376 | let len = min (String.length str - !p) De.io_buffer_size in 377 | Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len ; 378 | p := !p + len ; len in 379 | let flush buf len = 380 | let str = Bigstringaf.substring buf ~off:0 ~len in 381 | Buffer.add_string r str in 382 | match Gz.Higher.uncompress ~refill ~flush i o with 383 | | Ok m -> Ok (m, Buffer.contents r) 384 | | Error _ as err -> err 385 | ]} 386 | 387 | As {!De.Higher.uncompress} or {!Zl.Higher.uncompress}, [decompress] does not 388 | want to take the responsability of such implementation when several choices was 389 | made depending on the context. Indeed, [i] and [o] can be a serious bottleneck 390 | on the throughput. The choice of [Buffer] can be replaced by something else such 391 | as a queue or a {i ropes}. All of these choices should be made by the client. 392 | It's why we provide such function only as an example. 393 | *) 394 | end 395 | -------------------------------------------------------------------------------- /lib/lz.ml: -------------------------------------------------------------------------------- 1 | [@@@landmark "auto"] 2 | 3 | type bigstring = 4 | (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t 5 | 6 | let bigstring_empty = Bigarray.Array1.create Bigarray.char Bigarray.c_layout 0 7 | let bigstring_length x = Bigarray.Array1.dim x [@@inline] 8 | 9 | let bigstring_create l = 10 | Bigarray.Array1.create Bigarray.char Bigarray.c_layout l 11 | 12 | let invalid_arg fmt = Format.kasprintf invalid_arg fmt 13 | 14 | let invalid_bounds off len = 15 | invalid_arg "Out of bounds (off: %d, len: %d)" off len 16 | 17 | external unsafe_get_uint8 : bigstring -> int -> int = "%caml_ba_ref_1" 18 | external unsafe_get_char : bigstring -> int -> char = "%caml_ba_ref_1" 19 | external unsafe_set_uint8 : bigstring -> int -> int -> unit = "%caml_ba_set_1" 20 | external unsafe_get_uint16 : bigstring -> int -> int = "%caml_bigstring_get16" 21 | external unsafe_get_uint32 : bigstring -> int -> int32 = "%caml_bigstring_get32" 22 | 23 | external unsafe_set_uint32 : bigstring -> int -> int32 -> unit 24 | = "%caml_bigstring_set32" 25 | 26 | external bytes_unsafe_get_uint32 : bytes -> int -> int32 = "%caml_bytes_get32" 27 | 28 | let bytes_unsafe_get_uint8 : bytes -> int -> int = 29 | fun buf off -> Char.code (Bytes.get buf off) 30 | 31 | let input_bigstring ic buf off len = 32 | let tmp = Bytes.create len in 33 | let res = input ic tmp 0 len in 34 | 35 | let len0 = res land 3 in 36 | let len1 = res asr 2 in 37 | 38 | for i = 0 to len1 - 1 do 39 | let i = i * 4 in 40 | let v = bytes_unsafe_get_uint32 tmp i in 41 | unsafe_set_uint32 buf (off + i) v 42 | done 43 | 44 | ; for i = 0 to len0 - 1 do 45 | let i = (len1 * 4) + i in 46 | let v = bytes_unsafe_get_uint8 tmp i in 47 | unsafe_set_uint8 buf (off + i) v 48 | done 49 | ; res 50 | 51 | external string_unsafe_get_uint32 : string -> int -> int32 52 | = "%caml_string_get32" 53 | 54 | let string_unsafe_get_uint8 : string -> int -> int = 55 | fun buf off -> Char.code buf.[off] 56 | 57 | let bigstring_of_string v = 58 | let len = String.length v in 59 | let res = bigstring_create len in 60 | let len0 = len land 3 in 61 | let len1 = len asr 2 in 62 | 63 | for i = 0 to len1 - 1 do 64 | let i = i * 4 in 65 | let v = string_unsafe_get_uint32 v i in 66 | unsafe_set_uint32 res i v 67 | done 68 | 69 | ; for i = 0 to len0 - 1 do 70 | let i = (len1 * 4) + i in 71 | let v = string_unsafe_get_uint8 v i in 72 | unsafe_set_uint8 res i v 73 | done 74 | ; res 75 | 76 | let _length = 77 | [| 78 | 0; 1; 2; 3; 4; 5; 6; 7; 8; 8; 9; 9; 10; 10; 11; 11; 12; 12; 12; 12; 13; 13 79 | ; 13; 13; 14; 14; 14; 14; 15; 15; 15; 15; 16; 16; 16; 16; 16; 16; 16; 16; 17 80 | ; 17; 17; 17; 17; 17; 17; 17; 18; 18; 18; 18; 18; 18; 18; 18; 19; 19; 19; 19 81 | ; 19; 19; 19; 19; 20; 20; 20; 20; 20; 20; 20; 20; 20; 20; 20; 20; 20; 20; 20 82 | ; 20; 21; 21; 21; 21; 21; 21; 21; 21; 21; 21; 21; 21; 21; 21; 21; 21; 22; 22 83 | ; 22; 22; 22; 22; 22; 22; 22; 22; 22; 22; 22; 22; 22; 22; 23; 23; 23; 23; 23 84 | ; 23; 23; 23; 23; 23; 23; 23; 23; 23; 23; 23; 24; 24; 24; 24; 24; 24; 24; 24 85 | ; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24 86 | ; 24; 24; 24; 24; 24; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25 87 | ; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 25; 26 88 | ; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26 89 | ; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 27; 27; 27; 27; 27; 27; 27 90 | ; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27 91 | ; 27; 27; 27; 27; 27; 28 92 | |] 93 | 94 | let _distance = 95 | [| 96 | 0; 1; 2; 3; 4; 4; 5; 5; 6; 6; 6; 6; 7; 7; 7; 7; 8; 8; 8; 8; 8; 8; 8; 8; 9; 9 97 | ; 9; 9; 9; 9; 9; 9; 10; 10; 10; 10; 10; 10; 10; 10; 10; 10; 10; 10; 10; 10 98 | ; 10; 10; 11; 11; 11; 11; 11; 11; 11; 11; 11; 11; 11; 11; 11; 11; 11; 11; 12 99 | ; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12 100 | ; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 12; 13; 13; 13; 13; 13; 13; 13 101 | ; 13; 13; 13; 13; 13; 13; 13; 13; 13; 13; 13; 13; 13; 13; 13; 13; 13; 13; 13 102 | ; 13; 13; 13; 13; 13; 13; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14 103 | ; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14 104 | ; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14 105 | ; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 14; 15; 15; 15; 15; 15; 15 106 | ; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15 107 | ; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15 108 | ; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15; 15 109 | ; 15; 0; 0; 16; 17; 18; 18; 19; 19; 20; 20; 20; 20; 21; 21; 21; 21; 22; 22 110 | ; 22; 22; 22; 22; 22; 22; 23; 23; 23; 23; 23; 23; 23; 23; 24; 24; 24; 24; 24 111 | ; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 25; 25; 25; 25; 25; 25; 25; 25 112 | ; 25; 25; 25; 25; 25; 25; 25; 25; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26 113 | ; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26; 26 114 | ; 26; 26; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27 115 | ; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 27; 28; 28; 28; 28 116 | ; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28 117 | ; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28 118 | ; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28; 28 119 | ; 28; 28; 28; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29 120 | ; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29 121 | ; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29 122 | ; 29; 29; 29; 29; 29; 29; 29; 29; 29; 29 123 | |] 124 | 125 | let _distance code = 126 | if code < 256 then _distance.(code) else _distance.(256 + (code lsr 7)) 127 | [@@inline] 128 | 129 | let _max_match = 258 130 | let _min_match = 3 131 | let _min_lookahead = _max_match + _min_match + 1 132 | let ( .!() ) buf pos = unsafe_get_uint32 buf pos 133 | let ( .![] ) buf pos = unsafe_get_uint16 buf pos 134 | let ( .!{} ) buf pos = unsafe_get_uint8 buf pos 135 | 136 | type configuration = { 137 | max_chain: int 138 | ; max_lazy: int 139 | ; good_length: int 140 | ; nice_length: int 141 | } 142 | 143 | let _4 = {good_length= 4; max_lazy= 4; nice_length= 16; max_chain= 16} 144 | let _5 = {good_length= 8; max_lazy= 16; nice_length= 32; max_chain= 32} 145 | let _6 = {good_length= 8; max_lazy= 16; nice_length= 128; max_chain= 128} 146 | let _7 = {good_length= 8; max_lazy= 32; nice_length= 128; max_chain= 256} 147 | let _8 = {good_length= 32; max_lazy= 128; nice_length= 258; max_chain= 1024} 148 | let _9 = {good_length= 32; max_lazy= 258; nice_length= 258; max_chain= 4096} 149 | let _mem_level = 8 (* default *) 150 | let _hash_bits = _mem_level + 7 151 | let _hash_size = 1 lsl _hash_bits 152 | let _hash_mask = _hash_size - 1 153 | let _hash_shift = (_hash_bits + _min_match - 1) / _min_match 154 | let _too_far = 4096 155 | let update_hash hash chr = (hash lsl _hash_shift) lxor chr land _hash_mask 156 | 157 | type src = [ `Channel of in_channel | `String of string | `Manual ] 158 | type decode = [ `Await | `Flush | `End ] 159 | type literals = De.literals 160 | type distances = De.distances 161 | type optint = Optint.t 162 | 163 | type state = { 164 | src: src 165 | ; cfg: configuration 166 | ; mutable i: bigstring 167 | ; mutable i_pos: int 168 | ; mutable i_len: int 169 | ; l: literals 170 | ; d: distances 171 | ; w: bigstring 172 | ; wbits: int 173 | ; mutable lookahead: int 174 | ; mutable strstart: int 175 | ; prev: int array 176 | ; head: int array 177 | ; mutable hash: int 178 | ; mutable match_start: int 179 | ; mutable match_length: int 180 | ; mutable match_available: bool 181 | ; mutable insert: int 182 | ; mutable prev_length: int 183 | ; mutable prev_match: int 184 | ; q: De.Queue.t 185 | ; mutable crc: optint 186 | ; mutable k: configuration -> state -> decode 187 | } 188 | 189 | let max_dist s = (1 lsl s.wbits) - _min_lookahead 190 | 191 | exception Break 192 | 193 | (* cur is the head of the hash chain for the current string 194 | * and its distance is <= _max_dist 195 | * prev_length >= 1 196 | * len >= _min_lookahead *) 197 | let longest_match cfg s cur_match = 198 | let wsize = 1 lsl s.wbits in 199 | let wmask = wsize - 1 in 200 | let str_end = s.strstart + (_max_match - 1) in 201 | let limit = if s.strstart > max_dist s then s.strstart - max_dist s else 0 in 202 | 203 | (* Stop when !cur becomes <= limit. To somplify the code, 204 | * we prevent matches with the string of window index 0. *) 205 | let cur_match = ref cur_match in 206 | (* current match *) 207 | let chain_length = 208 | ref 209 | (if s.prev_length >= cfg.good_length then cfg.max_chain asr 2 210 | else cfg.max_chain) in 211 | (* max hash chain length *) 212 | let scan = ref s.strstart in 213 | (* current string *) 214 | let scan_start = s.w.![s.strstart] in 215 | let scan_end = ref s.w.![s.strstart + s.prev_length - 1] in 216 | let best_len = ref s.prev_length in 217 | 218 | (* best match length so far *) 219 | (try 220 | while 221 | let match' = ref !cur_match in 222 | if 223 | s.w.![!match' + !best_len - 1] <> !scan_end 224 | || s.w.![!match'] <> scan_start 225 | then begin 226 | cur_match := s.prev.(!cur_match land wmask) 227 | ; decr chain_length 228 | ; !cur_match > limit && !chain_length != 0 229 | end 230 | else begin 231 | incr scan 232 | ; incr match' 233 | ; while !scan < str_end && s.w.!(!scan) = s.w.!(!match') do 234 | scan := !scan + 4 235 | ; match' := !match' + 4 236 | done 237 | ; while !scan < str_end && s.w.![!scan] == s.w.![!match'] do 238 | scan := !scan + 2 239 | ; match' := !match' + 2 240 | done 241 | ; while !scan < str_end && s.w.!{!scan} == s.w.!{!match'} do 242 | scan := !scan + 1 243 | ; match' := !match' + 1 244 | done 245 | ; if s.w.!{!scan} == s.w.!{!match'} then incr scan 246 | ; let len = _max_match - 1 - (str_end - !scan) in 247 | scan := str_end - (_max_match - 1) 248 | ; if len > !best_len then begin 249 | s.match_start <- !cur_match 250 | ; best_len := len 251 | ; if len >= cfg.nice_length then raise Break 252 | ; scan_end := s.w.![!scan + !best_len - 1] 253 | end 254 | ; cur_match := s.prev.(!cur_match land wmask) 255 | ; decr chain_length 256 | ; !cur_match > limit && !chain_length != 0 257 | end 258 | do 259 | () 260 | done 261 | with Break -> ()) 262 | ; if !best_len <= s.lookahead then !best_len else s.lookahead 263 | 264 | let eoi s = 265 | s.i <- bigstring_empty 266 | ; s.i_pos <- 0 267 | ; s.i_len <- min_int 268 | 269 | let src d s j l = 270 | if j < 0 || l < 0 || j + l > bigstring_length s then invalid_bounds j l 271 | ; if l == 0 then eoi d 272 | else ( 273 | d.i <- s 274 | ; d.i_pos <- j 275 | ; d.i_len <- j + l - 1) 276 | 277 | let i_rem s = s.i_len - s.i_pos + 1 [@@inline] 278 | let src_rem s = i_rem s 279 | let io_buffer_size = 16384 280 | 281 | let refill k s = 282 | match s.src with 283 | | `String _ -> eoi s ; k s.cfg s 284 | | `Channel ic -> 285 | let res = input_bigstring ic s.i 0 (bigstring_length s.i) in 286 | src s s.i 0 res ; k s.cfg s 287 | | `Manual -> 288 | s.k <- k 289 | ; `Await 290 | 291 | let memcpy src ~src_off dst ~dst_off ~len = 292 | let len0 = len land 3 in 293 | let len1 = len asr 2 in 294 | for i = 0 to len1 - 1 do 295 | let i = i * 4 in 296 | let v = unsafe_get_uint32 src (src_off + i) in 297 | unsafe_set_uint32 dst (dst_off + i) v 298 | done 299 | ; for i = 0 to len0 - 1 do 300 | let i = (len1 * 4) + i in 301 | let v = unsafe_get_uint8 src (src_off + i) in 302 | unsafe_set_uint8 dst (dst_off + i) v 303 | done 304 | 305 | let update_crc s len = 306 | s.crc <- Checkseum.Adler32.digest_bigstring s.i s.i_pos len s.crc 307 | 308 | let insert_string s str = 309 | let wsize = 1 lsl s.wbits in 310 | let wmask = wsize - 1 in 311 | s.hash <- update_hash s.hash s.w.!{str + (_min_match - 1)} 312 | ; let res = s.head.(s.hash) in 313 | s.prev.(str land wmask) <- res 314 | ; s.head.(s.hash) <- str 315 | ; res 316 | 317 | let succ_length literals length = 318 | assert (length >= 3 && length <= 255 + 3) 319 | ; literals.(256 + 1 + _length.(length - 3)) <- 320 | literals.(256 + 1 + _length.(length - 3)) + 1 321 | 322 | let succ_distance distances distance = 323 | assert (distance >= 1 && distance <= 32767 + 1) 324 | ; distances.(_distance (pred distance)) <- 325 | distances.(_distance (pred distance)) + 1 326 | 327 | let emit_match s ~off ~len = 328 | De.Queue.push_exn s.q (De.Queue.cmd (`Copy (off, len))) 329 | ; succ_length (s.l :> int array) len 330 | ; succ_distance (s.d :> int array) off 331 | ; if De.Queue.available s.q = 1 then ( 332 | De.Queue.push_exn s.q De.Queue.eob 333 | ; true) 334 | else false 335 | 336 | let succ_literal literals chr = 337 | literals.(Char.code chr) <- literals.(Char.code chr) + 1 338 | 339 | let emit_literal s chr = 340 | De.Queue.push_exn s.q (De.Queue.cmd (`Literal chr)) 341 | ; succ_literal (s.l :> int array) chr 342 | ; if De.Queue.available s.q = 1 then ( 343 | De.Queue.push_exn s.q De.Queue.eob 344 | ; true) 345 | else false 346 | 347 | (* XXX(dinosaure): it's possible that it remains one literal. *) 348 | let trailing s = 349 | if s.match_available then ( 350 | let _ = emit_literal s (unsafe_get_char s.w (s.strstart - 1)) in 351 | s.insert <- 352 | (if s.strstart < _min_match - 1 then s.strstart else _min_match - 1) 353 | ; `End) 354 | else `End 355 | 356 | let slide_hash s = 357 | let wsize = 1 lsl s.wbits in 358 | let m = ref 0 in 359 | let n = ref _hash_size in 360 | let p = ref !n in 361 | while 362 | decr p 363 | ; m := s.head.(!p) 364 | ; s.head.(!p) <- (if !m >= wsize then !m - wsize else 0) 365 | ; decr n 366 | ; !n != 0 367 | do 368 | () 369 | done 370 | ; n := wsize 371 | ; p := !n 372 | ; while 373 | decr p 374 | ; m := s.prev.(!p) 375 | ; s.prev.(!p) <- (if !m >= wsize then !m - wsize else 0) 376 | ; decr n 377 | ; !n != 0 378 | do 379 | () 380 | done 381 | 382 | let rec fill_window (cfg : configuration) s = 383 | let wsize = 1 lsl s.wbits in 384 | let wmask = wsize - 1 in 385 | let more = (wsize * 2) - s.lookahead - s.strstart in 386 | (* max *) 387 | let more = 388 | if s.strstart >= wsize + max_dist s then begin 389 | memcpy s.w ~src_off:wsize s.w ~dst_off:0 ~len:(wsize - more) 390 | ; s.match_start <- s.match_start - wsize 391 | ; s.strstart <- s.strstart - wsize 392 | ; slide_hash s 393 | ; more + wsize 394 | end 395 | else more in 396 | let rem = i_rem s in 397 | if rem <= 0 (* if (s->strm->avail_in == 0) break; *) then 398 | if rem < 0 then if s.lookahead > 0 then deflate_slow cfg s else trailing s 399 | else refill fill_window s 400 | else 401 | try 402 | let len = min more rem in 403 | memcpy s.i ~src_off:s.i_pos s.w ~dst_off:(s.strstart + s.lookahead) ~len 404 | ; (*d*) update_crc s len 405 | ; s.lookahead <- s.lookahead + len 406 | ; (*d*) s.i_pos <- s.i_pos + len 407 | ; if s.lookahead + s.insert >= _min_match then begin 408 | let str = ref (s.strstart - s.insert) in 409 | let insert = ref s.insert in 410 | s.hash <- s.w.!{!str} 411 | ; s.hash <- update_hash s.hash s.w.!{!str + 1} 412 | ; while s.lookahead + !insert >= _min_match && !insert != 0 do 413 | s.hash <- update_hash s.hash s.w.!{!str + _min_match - 1} 414 | ; s.prev.(!str land wmask) <- s.head.(s.hash) 415 | ; s.head.(s.hash) <- !str 416 | ; incr str 417 | ; decr insert 418 | ; if s.lookahead + !insert < _min_match then ( 419 | s.insert <- !insert 420 | ; raise Break) 421 | done 422 | ; s.insert <- !insert 423 | end 424 | ; if s.lookahead < _min_lookahead && i_rem s >= 0 then 425 | refill fill_window s 426 | else deflate_slow cfg s 427 | with Break -> deflate_slow cfg s 428 | 429 | and enough cfg s = 430 | if s.lookahead < _min_lookahead then fill_window cfg s else deflate_slow cfg s 431 | 432 | and deflate_slow cfg s = 433 | let hash_head = ref 0 in 434 | if s.lookahead >= _min_match then hash_head := insert_string s s.strstart 435 | ; s.prev_length <- s.match_length 436 | ; s.prev_match <- s.match_start 437 | ; s.match_length <- _min_match - 1 438 | ; (if 439 | !hash_head != 0 440 | && s.prev_length < cfg.max_lazy 441 | && s.strstart - !hash_head <= max_dist s 442 | then 443 | let match_length = longest_match cfg s !hash_head in 444 | if 445 | match_length <= 5 446 | && match_length == _min_match 447 | && s.strstart - s.match_start > _too_far 448 | then s.match_length <- _min_match - 1 449 | else s.match_length <- match_length) 450 | ; if s.prev_length >= _min_match && s.match_length <= s.prev_length then begin 451 | let max_insert = s.strstart + s.lookahead - _min_match in 452 | let flush = 453 | emit_match s ~off:(s.strstart - 1 - s.prev_match) ~len:s.prev_length 454 | in 455 | s.lookahead <- s.lookahead - (s.prev_length - 1) 456 | ; s.prev_length <- s.prev_length - 2 457 | ; while 458 | s.strstart <- s.strstart + 1 459 | ; if s.strstart <= max_insert then 460 | hash_head := insert_string s s.strstart 461 | ; s.prev_length <- s.prev_length - 1 462 | ; s.prev_length <> 0 463 | do 464 | () 465 | done 466 | ; s.match_available <- false 467 | ; s.match_length <- _min_match - 1 468 | ; s.strstart <- s.strstart + 1 469 | ; if flush then ( 470 | s.k <- enough 471 | ; `Flush) 472 | else enough cfg s 473 | end 474 | else if s.match_available then begin 475 | match emit_literal s (unsafe_get_char s.w (s.strstart - 1)) with 476 | | true -> 477 | s.strstart <- s.strstart + 1 478 | ; s.lookahead <- s.lookahead - 1 479 | ; s.k <- enough 480 | ; `Flush 481 | | false -> 482 | s.strstart <- s.strstart + 1 483 | ; s.lookahead <- s.lookahead - 1 484 | ; enough cfg s 485 | end 486 | else begin 487 | s.match_available <- true 488 | ; s.strstart <- s.strstart + 1 489 | ; s.lookahead <- s.lookahead - 1 490 | ; enough cfg s 491 | end 492 | 493 | let _literals = 256 494 | let _length_codes = 29 495 | let _l_codes = _literals + 1 + _length_codes 496 | let _d_codes = 30 497 | let checksum {crc; _} = crc 498 | let distances {d; _} = d 499 | let literals {l; _} = l 500 | 501 | let ctz x = 502 | let n = ref 0 and x = ref x and y = ref 0 in 503 | if Sys.word_size = 64 then ( 504 | n := 63 505 | ; y := !x lsl 32 506 | ; if !y != 0 then ( 507 | n := !n - 32 508 | ; x := !y)) 509 | else n := 31 510 | ; y := !x lsl 16 511 | ; if !y != 0 then ( 512 | n := !n - 16 513 | ; x := !y) 514 | ; y := !x lsl 8 515 | ; if !y != 0 then ( 516 | n := !n - 8 517 | ; x := !y) 518 | ; y := !x lsl 4 519 | ; if !y != 0 then ( 520 | n := !n - 4 521 | ; x := !y) 522 | ; y := !x lsl 2 523 | ; if !y != 0 then ( 524 | n := !n - 2 525 | ; x := !y) 526 | ; y := !x lsl 1 527 | ; if !y != 0 then n := !n - 1 528 | ; !n 529 | 530 | let state ?(level = 4) ~q ~w src = 531 | let wbits = ctz (bigstring_length w / 2) - 1 in 532 | let wsize = 1 lsl wbits in 533 | let cfg = 534 | match level with 535 | | 0 | 1 | 2 | 3 | 4 -> _4 536 | | 5 -> _5 537 | | 6 -> _6 538 | | 7 -> _7 539 | | 8 -> _8 540 | | 9 -> _9 541 | | _ -> invalid_arg "Invalid compression level: %d" level in 542 | let i, i_pos, i_len = 543 | match src with 544 | | `Manual -> bigstring_empty, 1, 0 545 | | `String x -> bigstring_of_string x, 0, String.length x - 1 546 | | `Channel _ -> bigstring_create io_buffer_size, 1, 0 in 547 | { 548 | src 549 | ; i 550 | ; i_pos 551 | ; i_len 552 | ; cfg 553 | ; l= De.make_literals () 554 | ; d= De.make_distances () 555 | ; w 556 | ; wbits 557 | ; lookahead= 0 558 | ; strstart= 0 559 | ; prev= Array.make wsize 0 560 | ; head= Array.make _hash_size 0 561 | ; hash= 0 562 | ; match_start= 0 563 | ; match_length= _min_match - 1 564 | ; match_available= false 565 | ; insert= 0 566 | ; prev_length= _min_match - 1 567 | ; prev_match= 0 568 | ; q 569 | ; crc= Checkseum.Adler32.default 570 | ; k= enough 571 | } 572 | 573 | let compress s = s.k s.cfg s 574 | 575 | type window = bigstring 576 | 577 | let make_window ~bits = bigstring_create ((1 lsl bits) * 2) 578 | -------------------------------------------------------------------------------- /lib/lz.mli: -------------------------------------------------------------------------------- 1 | type bigstring = 2 | (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t 3 | 4 | type optint = Optint.t 5 | type src = [ `Channel of in_channel | `String of string | `Manual ] 6 | type decode = [ `Flush | `Await | `End ] 7 | type state 8 | type literals = De.literals 9 | type distances = De.distances 10 | type window 11 | 12 | val literals : state -> literals 13 | val distances : state -> distances 14 | val checksum : state -> optint 15 | val src : state -> bigstring -> int -> int -> unit 16 | val src_rem : state -> int 17 | val make_window : bits:int -> window 18 | val compress : state -> decode 19 | val state : ?level:int -> q:De.Queue.t -> w:window -> src -> state 20 | -------------------------------------------------------------------------------- /lib/lzo.mli: -------------------------------------------------------------------------------- 1 | type bigstring = 2 | (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t 3 | 4 | type error = 5 | [ `Malformed of string | `Invalid_argument of string | `Invalid_dictionary ] 6 | 7 | val pp_error : Format.formatter -> error -> unit 8 | 9 | val uncompress : bigstring -> bigstring -> (bigstring, [> error ]) result 10 | (** [uncompress input output] returns a {i sub-layout} of [output] which is the 11 | inflated contents of [input]. Otherwise, it returns: 12 | 13 | {ul 14 | {- [`Malformed] if the [input] is not recognized as a LZO contents.} 15 | {- [`Invalid_argument] if [output] is not large enough to contain inflated contents.} 16 | {- [`Invalid_dictionary] if an {i op-code} of [input] refers to 17 | an unbound location.}} *) 18 | 19 | val uncompress_with_buffer : 20 | ?chunk:int -> bigstring -> (string, [> error ]) result 21 | (** [uncompress ?chunk input] returns a fresh-allocated [string] which is the 22 | inflated contents of [input]. An internal {!Buffer.t} is used and it can be 23 | initialized with [chunk] (default to [0x1000]). Otherwise, it returns same errors 24 | as [uncompress]. *) 25 | 26 | type wrkmem 27 | (** Type of an internal {i hash-table} used by {!compress}. *) 28 | 29 | val make_wrkmem : unit -> wrkmem 30 | (** [make_wrkmem ()] returns a fresh-allocated {!wrkmem}. *) 31 | 32 | val compress : bigstring -> bigstring -> wrkmem -> int 33 | (** [compress input output wrkmem] deflates [input] and produces a LZO contents 34 | into [output]. It uses [wrkmem] to do the deflation. It returns the number 35 | of bytes wrotes into [output] such as: 36 | 37 | {[ 38 | let len = compress input output wrkmem in 39 | Bigarray.Array1.sub output 0 len 40 | ]} 41 | 42 | is the deflated contents of [input]. 43 | 44 | @raise Invalid_argument if [output] is not large enough to contain the 45 | deflated contents. *) 46 | -------------------------------------------------------------------------------- /lib/zl.ml: -------------------------------------------------------------------------------- 1 | let io_buffer_size = 65536 2 | let kstrf k fmt = Format.kasprintf k fmt 3 | let invalid_arg fmt = Format.kasprintf invalid_arg fmt 4 | 5 | type bigstring = 6 | (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t 7 | 8 | type window = De.window 9 | 10 | let bigstring_create l = 11 | Bigarray.Array1.create Bigarray.char Bigarray.c_layout l 12 | 13 | let bigstring_empty = Bigarray.Array1.create Bigarray.char Bigarray.c_layout 0 14 | let bigstring_length x = Bigarray.Array1.dim x [@@inline] 15 | let bigstring_sub x = Bigarray.Array1.sub x [@@inline] 16 | 17 | external unsafe_get_uint8 : bigstring -> int -> int = "%caml_ba_ref_1" 18 | external unsafe_get_uint16 : bigstring -> int -> int = "%caml_bigstring_get16" 19 | external unsafe_get_uint32 : bigstring -> int -> int32 = "%caml_bigstring_get32" 20 | external unsafe_set_uint8 : bigstring -> int -> int -> unit = "%caml_ba_set_1" 21 | external swap16 : int -> int = "%bswap16" 22 | 23 | let _unsafe_get_uint16_be v i = 24 | if Sys.big_endian then unsafe_get_uint16 v i 25 | else swap16 (unsafe_get_uint16 v i) 26 | 27 | let unsafe_get_uint16_le v i = 28 | if Sys.big_endian then swap16 (unsafe_get_uint16 v i) 29 | else unsafe_get_uint16 v i 30 | 31 | external unsafe_set_uint16 : bigstring -> int -> int -> unit 32 | = "%caml_bigstring_set16" 33 | 34 | external unsafe_set_uint32 : bigstring -> int -> int32 -> unit 35 | = "%caml_bigstring_set32" 36 | 37 | external swap16 : int -> int = "%bswap16" 38 | external swap32 : int32 -> int32 = "caml_int32_bswap" 39 | 40 | let string_unsafe_get_uint8 : string -> int -> int = 41 | fun buf off -> Char.code buf.[off] 42 | 43 | external string_unsafe_get_uint32 : string -> int -> int32 44 | = "%caml_string_get32" 45 | 46 | let bytes_unsafe_get_uint8 : bytes -> int -> int = 47 | fun buf off -> Char.code (Bytes.get buf off) 48 | 49 | external bytes_unsafe_get_uint32 : bytes -> int -> int32 = "%caml_bytes_get32" 50 | 51 | let bytes_unsafe_set_uint8 : bytes -> int -> int -> unit = 52 | fun buf off v -> Bytes.set buf off (Char.unsafe_chr (v land 0xff)) 53 | 54 | external bytes_unsafe_set_uint32 : bytes -> int -> int32 -> unit 55 | = "%caml_bytes_set32" 56 | 57 | let input_bigstring ic buf off len = 58 | let tmp = Bytes.create len in 59 | let res = input ic tmp 0 len in 60 | 61 | let len0 = res land 3 in 62 | let len1 = res asr 2 in 63 | 64 | for i = 0 to len1 - 1 do 65 | let i = i * 4 in 66 | let v = bytes_unsafe_get_uint32 tmp i in 67 | unsafe_set_uint32 buf (off + i) v 68 | done 69 | 70 | ; for i = 0 to len0 - 1 do 71 | let i = (len1 * 4) + i in 72 | let v = bytes_unsafe_get_uint8 tmp i in 73 | unsafe_set_uint8 buf (off + i) v 74 | done 75 | ; res 76 | 77 | let bigstring_to_string v = 78 | let len = bigstring_length v in 79 | let res = Bytes.create len in 80 | let len0 = len land 3 in 81 | let len1 = len asr 2 in 82 | 83 | for i = 0 to len1 - 1 do 84 | let i = i * 4 in 85 | let v = unsafe_get_uint32 v i in 86 | bytes_unsafe_set_uint32 res i v 87 | done 88 | 89 | ; for i = 0 to len0 - 1 do 90 | let i = (len1 * 4) + i in 91 | let v = unsafe_get_uint8 v i in 92 | bytes_unsafe_set_uint8 res i v 93 | done 94 | 95 | ; Bytes.unsafe_to_string res 96 | 97 | let output_bigstring oc buf off len = 98 | (* XXX(dinosaure): stupidly slow! *) 99 | let v = Bigarray.Array1.sub buf off len in 100 | let v = bigstring_to_string v in 101 | output_string oc v 102 | 103 | let bigstring_of_string v = 104 | let len = String.length v in 105 | let res = bigstring_create len in 106 | let len0 = len land 3 in 107 | let len1 = len asr 2 in 108 | 109 | for i = 0 to len1 - 1 do 110 | let i = i * 4 in 111 | let v = string_unsafe_get_uint32 v i in 112 | unsafe_set_uint32 res i v 113 | done 114 | 115 | ; for i = 0 to len0 - 1 do 116 | let i = (len1 * 4) + i in 117 | let v = string_unsafe_get_uint8 v i in 118 | unsafe_set_uint8 res i v 119 | done 120 | ; res 121 | 122 | let unsafe_get_uint32_be = 123 | if Sys.big_endian then fun buf off -> unsafe_get_uint32 buf off 124 | else fun buf off -> swap32 (unsafe_get_uint32 buf off) 125 | 126 | let unsafe_set_uint32_be = 127 | if Sys.big_endian then fun buf off v -> unsafe_set_uint32 buf off v 128 | else fun buf off v -> unsafe_set_uint32 buf off (swap32 v) 129 | 130 | let unsafe_set_uint16_be = 131 | if Sys.big_endian then fun buf off v -> unsafe_set_uint16 buf off v 132 | else fun buf off v -> unsafe_set_uint16 buf off (swap16 v) 133 | 134 | let _unsafe_set_uint16_le = 135 | if Sys.big_endian then fun buf off v -> unsafe_set_uint16 buf off (swap16 v) 136 | else fun buf off v -> unsafe_set_uint16 buf off v 137 | 138 | let invalid_bounds off len = 139 | invalid_arg "Out of bounds (off: %d, len: %d)" off len 140 | 141 | let _deflated = 8 (* Compression method *) 142 | 143 | module Inf = struct 144 | type src = [ `Channel of in_channel | `String of string | `Manual ] 145 | 146 | (* XXX(dinosaure): immutable style. *) 147 | type decoder = { 148 | src: De.Inf.src 149 | ; i: bigstring 150 | ; i_pos: int 151 | ; i_len: int 152 | ; f: bool 153 | ; wr: int 154 | ; hd: int 155 | ; dd: dd 156 | ; fdict: bool 157 | ; flevel: int 158 | ; cinfo: int 159 | ; allocate: int -> De.window 160 | ; t: bigstring 161 | ; t_need: int 162 | ; t_len: int 163 | ; k: decoder -> signal 164 | } 165 | 166 | and dd = 167 | | Dd of {state: De.Inf.decoder; window: De.window; o: De.bigstring} 168 | | Hd of {o: De.bigstring} 169 | 170 | and signal = 171 | [ `Await of decoder 172 | | `Flush of decoder 173 | | `End of decoder 174 | | `Malformed of string ] 175 | 176 | let malformedf fmt = kstrf (fun s -> `Malformed s) fmt 177 | let err_unexpected_end_of_input _ = malformedf "Unexpected end of input" 178 | 179 | let err_invalid_checksum has expect _ = 180 | malformedf "Invalid checksum (expect:%04lx, has:%04lx)" expect 181 | (Optint.to_unsigned_int32 has) 182 | 183 | let err_invalid_header _ = malformedf "Invalid Zlib header" 184 | 185 | (* remaining bytes to read [d.i] *) 186 | let i_rem d = d.i_len - d.i_pos + 1 [@@inline] 187 | 188 | (* End of input [eoi] is signalled by [d.i_pos = 0] and [d.i_len = min_int] 189 | which implies [i_rem d < 0] is [true]. *) 190 | 191 | let eoi d = {d with i= bigstring_empty; i_pos= 0; i_len= min_int} 192 | 193 | let refill k d = 194 | match d.dd, d.src with 195 | | Dd {state; _}, `String _ -> 196 | De.Inf.src state bigstring_empty 0 0 197 | ; k (eoi d) 198 | | Dd {state; _}, `Channel ic -> 199 | let res = input_bigstring ic d.i 0 (bigstring_length d.i) in 200 | De.Inf.src state d.i 0 res ; k d 201 | | (Dd _ | Hd _), `Manual -> `Await {d with k} 202 | | Hd _, `String _ -> k (eoi d) 203 | | Hd _, `Channel ic -> 204 | let res = input_bigstring ic d.i 0 (bigstring_length d.i) in 205 | if res == 0 then k (eoi d) else k {d with i_pos= 0; i_len= res - 1} 206 | 207 | let flush k d = `Flush {d with k} 208 | 209 | let blit src ~src_off dst ~dst_off ~len = 210 | let a = Bigarray.Array1.sub src src_off len in 211 | let b = Bigarray.Array1.sub dst dst_off len in 212 | Bigarray.Array1.blit a b 213 | 214 | let rec t_fill k d = 215 | let blit d len = 216 | blit d.i ~src_off:d.i_pos d.t ~dst_off:d.t_len ~len 217 | ; {d with i_pos= d.i_pos + len; t_len= d.t_len + len} in 218 | let rem = i_rem d in 219 | if rem < 0 then malformedf "Unexpected end of input" 220 | else 221 | let need = d.t_need - d.t_len in 222 | if rem < need then 223 | let d = blit d rem in 224 | refill (t_fill k) d 225 | else 226 | let d = blit d need in 227 | k {d with t_need= 0} 228 | 229 | let t_need n d = {d with t_need= n} 230 | 231 | let checksum d = 232 | let k d = 233 | match d.dd with 234 | | Dd {state; _} -> 235 | let a = De.Inf.checksum state in 236 | let b = unsafe_get_uint32_be d.t 0 in 237 | 238 | if 239 | Optint.to_unsigned_int32 a = b 240 | (* FIXME: Optint.equal a (Optint.of_int32 b) bugs! *) 241 | then `End d 242 | else err_invalid_checksum a b d 243 | | Hd _ -> assert false in 244 | 245 | t_fill k (t_need 4 d) 246 | 247 | let rec header d = 248 | let k d = 249 | let[@warning "-8"] (Hd {o}) = d.dd in 250 | let cmf = unsafe_get_uint16_le d.t 0 in 251 | let cm = cmf land 0b1111 in 252 | let cinfo = (cmf lsr 4) land 0b1111 in 253 | let flg = cmf lsr 8 in 254 | let fdict = (flg lsr 5) land 0b1 in 255 | let flevel = (flg lsr 6) land 0b11 in 256 | let window = d.allocate (cinfo + 8) in 257 | let state = De.Inf.decoder `Manual ~o ~w:window in 258 | let dd = Dd {state; window; o} in 259 | if (((cmf land 0xff) lsl 8) + (cmf lsr 8)) mod 31 != 0 || cm != _deflated 260 | then err_invalid_header d 261 | else ( 262 | if i_rem d > 0 then De.Inf.src state d.i d.i_pos (i_rem d) 263 | ; decode 264 | { 265 | d with 266 | hd= unsafe_get_uint16_le d.t 0 267 | ; k= decode 268 | ; dd 269 | ; t_need= 0 270 | ; t_len= 0 271 | ; fdict= fdict == 1 272 | ; flevel 273 | ; cinfo 274 | }) in 275 | if i_rem d >= 2 then ( 276 | unsafe_set_uint16 d.t 0 (unsafe_get_uint16 d.i d.i_pos) 277 | ; k {d with i_pos= d.i_pos + 2}) 278 | else if i_rem d < 0 then err_unexpected_end_of_input d 279 | else if i_rem d == 0 then refill header d 280 | else t_fill k (t_need 2 d) 281 | 282 | and decode d = 283 | match d.dd with 284 | | Hd _ -> header d 285 | | Dd {state; o; _} -> ( 286 | match De.Inf.decode state with 287 | | `Flush -> 288 | if d.f then flush decode d 289 | else 290 | let len = bigstring_length o - De.Inf.dst_rem state in 291 | flush decode {d with wr= d.wr + len; f= true} 292 | | `Await -> 293 | let len = i_rem d - De.Inf.src_rem state in 294 | refill decode {d with i_pos= d.i_pos + len} 295 | | `End -> 296 | if d.f then flush decode d 297 | else 298 | let len = bigstring_length o - De.Inf.dst_rem state in 299 | if len > 0 then 300 | flush decode 301 | { 302 | d with 303 | i_pos= d.i_pos + (i_rem d - De.Inf.src_rem state) 304 | ; wr= d.wr + len 305 | ; f= true 306 | } 307 | else 308 | checksum {d with i_pos= d.i_pos + (i_rem d - De.Inf.src_rem state)} 309 | | `Malformed err -> `Malformed err) 310 | 311 | let src d s j l = 312 | if j < 0 || l < 0 || j + l > bigstring_length s then invalid_bounds j l 313 | ; let d = 314 | if l == 0 then eoi d else {d with i= s; i_pos= j; i_len= j + l - 1} 315 | in 316 | match d.dd with Dd {state; _} -> De.Inf.src state s j l ; d | Hd _ -> d 317 | 318 | let flush d = 319 | match d.dd with 320 | | Hd _ -> {d with f= false} 321 | | Dd {state; _} -> De.Inf.flush state ; {d with f= false} 322 | 323 | let dst_rem d = 324 | match d.dd with Hd _ -> 0 | Dd {state; _} -> De.Inf.dst_rem state 325 | 326 | let src_rem d = i_rem d 327 | let write {wr; _} = wr 328 | 329 | let decoder src ~o ~allocate = 330 | let i, i_pos, i_len = 331 | match src with 332 | | `Manual -> bigstring_empty, 1, 0 333 | | `String x -> bigstring_of_string x, 0, String.length x - 1 334 | | `Channel _ -> bigstring_create io_buffer_size, 1, 0 in 335 | { 336 | i 337 | ; i_pos 338 | ; i_len 339 | ; src 340 | ; f= false 341 | ; wr= 0 342 | ; hd= 0 343 | ; dd= Hd {o} 344 | ; fdict= false 345 | ; flevel= 2 346 | ; cinfo= 8 347 | ; allocate 348 | ; t= bigstring_create 4 349 | ; t_need= 0 350 | ; t_len= 0 351 | ; k= decode 352 | } 353 | 354 | let reset d = 355 | let i, i_pos, i_len = 356 | match d.src with 357 | | `Manual -> bigstring_empty, 1, 0 358 | | `String x -> bigstring_of_string x, 0, String.length x - 1 359 | | `Channel _ -> bigstring_create io_buffer_size, 1, 0 in 360 | let o = match d.dd with Hd {o} -> o | Dd {o; _} -> o in 361 | { 362 | i 363 | ; i_pos 364 | ; i_len 365 | ; src= d.src 366 | ; f= false 367 | ; wr= 0 368 | ; hd= 0 369 | ; dd= Hd {o} 370 | ; fdict= false 371 | ; flevel= 2 372 | ; cinfo= 8 373 | ; allocate= d.allocate 374 | ; t= d.t 375 | ; t_need= 0 376 | ; t_len= 0 377 | ; k= decode 378 | } 379 | 380 | let decode d = d.k d 381 | 382 | module Ns = struct 383 | type error = [ `Invalid_header | `Invalid_checksum | De.Inf.Ns.error ] 384 | 385 | let pp_error ppf e = 386 | match e with 387 | | `Invalid_header -> Format.fprintf ppf "Invalid header" 388 | | `Invalid_checksum -> Format.fprintf ppf "Invalid checksum" 389 | | #De.Inf.Ns.error as e -> De.Inf.Ns.pp_error ppf e 390 | 391 | let header src = 392 | let cmf = unsafe_get_uint16_le src 0 in 393 | let cm = cmf land 0b1111 in 394 | let _cinfo = (cmf lsr 4) land 0b1111 in 395 | let flg = cmf lsr 8 in 396 | let _fdict = (flg lsr 5) land 0b1 in 397 | let _flevel = (flg lsr 6) land 0b11 in 398 | (((cmf land 0xff) lsl 8) + (cmf lsr 8)) mod 31 != 0 || cm != _deflated 399 | 400 | let inflate src dst = 401 | let src_len = bigstring_length src in 402 | if src_len < 2 then Error `Unexpected_end_of_input 403 | else if header src then Error `Invalid_header 404 | else 405 | let sub_src = bigstring_sub src 2 (bigstring_length src - 6) in 406 | let res = De.Inf.Ns.inflate sub_src dst in 407 | match res with 408 | | Ok (i, o) -> 409 | if src_len < i + 6 then Error `Unexpected_end_of_input 410 | else 411 | let i_adl32 = unsafe_get_uint32_be src (i + 2) in 412 | let o_adl32 = 413 | Optint.to_int32 414 | Checkseum.Adler32.(unsafe_digest_bigstring dst 0 o default) 415 | in 416 | if i_adl32 <> o_adl32 then Error `Invalid_checksum else Ok (i + 6, o) 417 | | Error e -> Error (e : De.Inf.Ns.error :> [> error ]) 418 | end 419 | end 420 | 421 | module Def = struct 422 | type src = [ `Channel of in_channel | `String of string | `Manual ] 423 | type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] 424 | 425 | type encoder = { 426 | src: src 427 | ; dst: dst 428 | ; level: int 429 | ; dynamic: bool 430 | ; i: bigstring 431 | ; i_pos: int 432 | ; i_len: int 433 | ; o: bigstring 434 | ; o_pos: int 435 | ; o_len: int 436 | ; q: De.Queue.t 437 | ; s: De.Lz77.state 438 | ; e: De.Def.encoder 439 | ; w: De.Lz77.window 440 | ; state: state 441 | ; k: encoder -> [ `Await of encoder | `Flush of encoder | `End of encoder ] 442 | } 443 | 444 | and state = 445 | | Hd 446 | (* header process *) 447 | | Dd 448 | 449 | (* DEFLATE process *) 450 | 451 | type ret = [ `Await of encoder | `End of encoder | `Flush of encoder ] 452 | 453 | let o_rem e = e.o_len - e.o_pos + 1 454 | let i_rem s = s.i_len - s.i_pos + 1 455 | 456 | let eoi e = 457 | De.Lz77.src e.s bigstring_empty 0 0 458 | ; {e with i= bigstring_empty; i_pos= 0; i_len= min_int} 459 | 460 | let src e s j l = 461 | if j < 0 || l < 0 || j + l > bigstring_length s then invalid_bounds j l 462 | ; De.Lz77.src e.s s j l 463 | ; if l == 0 then eoi e else {e with i= s; i_pos= j; i_len= j + l - 1} 464 | 465 | let dst e s j l = 466 | if j < 0 || l < 0 || j + l > bigstring_length s then invalid_bounds j l 467 | ; (match e.state with Hd -> () | Dd -> De.Def.dst e.e s j l) 468 | ; {e with o= s; o_pos= j; o_len= j + l - 1} 469 | 470 | let refill k e = 471 | match e.src with 472 | | `String _ -> k (eoi e) 473 | | `Channel ic -> 474 | let res = input_bigstring ic e.i 0 (bigstring_length e.i) in 475 | k (src e e.i 0 res) 476 | | `Manual -> `Await {e with k} 477 | 478 | let flush k e = 479 | match e.dst with 480 | | `Buffer b -> 481 | let len = bigstring_length e.o - o_rem e in 482 | for i = 0 to len - 1 do 483 | Buffer.add_char b e.o.{i} 484 | done 485 | ; k (dst e e.o 0 (bigstring_length e.o)) 486 | | `Channel oc -> 487 | output_bigstring oc e.o 0 (bigstring_length e.o - o_rem e) 488 | ; k (dst e e.o 0 (bigstring_length e.o)) 489 | | `Manual -> `Flush {e with k} 490 | 491 | let identity e = `End e 492 | 493 | let rec checksum e = 494 | let k e = 495 | let checksum = Optint.to_int32 (De.Lz77.checksum e.s) in 496 | unsafe_set_uint32_be e.o e.o_pos checksum 497 | ; flush identity {e with o_pos= e.o_pos + 4} in 498 | if o_rem e >= 4 then k e else flush checksum e 499 | 500 | let make_block ?(last = false) e = 501 | if De.Lz77.no_compression e.s then {De.Def.kind= De.Def.Flat; last} 502 | else if last = false && e.dynamic then 503 | let literals = De.Lz77.literals e.s in 504 | let distances = De.Lz77.distances e.s in 505 | let dynamic = De.Def.dynamic_of_frequencies ~literals ~distances in 506 | {De.Def.kind= De.Def.Dynamic dynamic; last} 507 | else {De.Def.kind= De.Def.Fixed; last} 508 | 509 | let rec encode e = 510 | match e.state with 511 | | Hd -> 512 | let k e = 513 | let window_bits = 15 in 514 | let header = (_deflated + ((window_bits - 8) lsl 4)) lsl 8 in 515 | let header = header lor (e.level lsl 6) in 516 | let header = header + (31 - (header mod 31)) in 517 | unsafe_set_uint16_be e.o e.o_pos header 518 | ; if i_rem e > 0 then De.Lz77.src e.s e.i e.i_pos (i_rem e) 519 | ; (* XXX(dinosaure): we need to protect [e.s] against EOI signal. *) 520 | De.Def.dst e.e e.o (e.o_pos + 2) (o_rem e - 2) 521 | ; encode {e with state= Dd; o_pos= e.o_pos + 2} in 522 | if o_rem e >= 2 then k e else flush encode e 523 | | Dd -> 524 | let rec partial k e = k e (De.Def.encode e.e `Await) 525 | and compress e = 526 | match De.Lz77.compress e.s with 527 | | `Await -> 528 | refill compress 529 | {e with i_pos= e.i_pos + (i_rem e - De.Lz77.src_rem e.s)} 530 | | `Flush -> encode_deflate e (De.Def.encode e.e `Flush) 531 | | `End -> 532 | let block = make_block ~last:true e in 533 | trailing e (De.Def.encode e.e (`Block block)) 534 | and encode_deflate e = function 535 | | `Partial -> 536 | let len = o_rem e - De.Def.dst_rem e.e in 537 | flush (partial encode_deflate) {e with o_pos= e.o_pos + len} 538 | | `Ok -> compress e 539 | | `Block -> 540 | let block = make_block e in 541 | encode_deflate e (De.Def.encode e.e (`Block block)) 542 | and trailing e = function 543 | | `Partial -> 544 | let len = o_rem e - De.Def.dst_rem e.e in 545 | flush (partial trailing) {e with o_pos= e.o_pos + len} 546 | | `Ok -> 547 | let len = o_rem e - De.Def.dst_rem e.e in 548 | checksum {e with o_pos= e.o_pos + len} 549 | | `Block -> assert false 550 | (* XXX(dinosaure): should never occur! *) in 551 | 552 | compress e 553 | 554 | let src_rem = i_rem 555 | let dst_rem = o_rem 556 | 557 | let encoder ?(dynamic = true) ~q ~w ~level src dst = 558 | let i, i_pos, i_len = 559 | match src with 560 | | `Manual -> bigstring_empty, 1, 0 561 | | `String x -> bigstring_of_string x, 0, String.length x - 1 562 | | `Channel _ -> bigstring_create io_buffer_size, 1, 0 in 563 | let o, o_pos, o_len = 564 | match dst with 565 | | `Manual -> bigstring_empty, 1, 0 566 | | `Buffer _ | `Channel _ -> 567 | bigstring_create io_buffer_size, 0, io_buffer_size - 1 in 568 | { 569 | src 570 | ; dst 571 | ; i 572 | ; i_pos 573 | ; i_len 574 | ; o 575 | ; o_pos 576 | ; o_len 577 | ; level= 578 | (match level with 0 -> 0 | 1 | 2 | 3 | 4 | 5 -> 1 | 6 -> 2 | _ -> 3) 579 | ; dynamic 580 | ; e= De.Def.encoder `Manual ~q 581 | ; s= De.Lz77.state ~level `Manual ~q ~w 582 | ; q 583 | ; w 584 | ; state= Hd 585 | ; k= encode 586 | } 587 | 588 | let encode e = e.k e 589 | 590 | module Ns = struct 591 | type error = De.Def.Ns.error 592 | 593 | let pp_error ppf e = 594 | match e with #De.Def.Ns.error as e -> De.Def.Ns.pp_error ppf e 595 | 596 | let compress_bound len = De.Def.Ns.compress_bound len + 6 597 | 598 | let header dst level = 599 | let window_bits = 15 in 600 | let header = (_deflated + ((window_bits - 8) lsl 4)) lsl 8 in 601 | let level = 602 | match level with 0 | 1 -> 0 | 2 | 3 | 4 | 5 -> 1 | 6 -> 2 | _ -> 3 in 603 | let header = header lor (level lsl 6) in 604 | let header = header + (31 - (header mod 31)) in 605 | unsafe_set_uint16_be dst 0 header 606 | 607 | let deflate ?(level = 4) src dst = 608 | if bigstring_length dst < 2 then Error `Unexpected_end_of_output 609 | else begin 610 | header dst level 611 | ; let sub_dst = bigstring_sub dst 2 (bigstring_length dst - 2) in 612 | let res = De.Def.Ns.deflate ~level src sub_dst in 613 | match res with 614 | | Ok res -> 615 | let adl32 = 616 | Checkseum.Adler32.( 617 | unsafe_digest_bigstring src 0 (bigstring_length src) default) 618 | in 619 | if bigstring_length sub_dst - res < 2 then 620 | Error `Unexpected_end_of_output 621 | else ( 622 | unsafe_set_uint32_be sub_dst res (Optint.to_int32 adl32) 623 | ; Ok (res + 6)) 624 | | Error e -> Error (e : De.Def.Ns.error :> [> error ]) 625 | end 626 | end 627 | end 628 | 629 | module Higher = struct 630 | let compress ?(level = 6) ?dynamic ~w ~q ~refill ~flush i o = 631 | let encoder = Def.encoder `Manual `Manual ?dynamic ~q ~w ~level in 632 | let rec go encoder = 633 | match Def.encode encoder with 634 | | `Await encoder -> 635 | let len = refill i in 636 | go (Def.src encoder i 0 len) 637 | | `Flush encoder -> 638 | let len = bigstring_length o - Def.dst_rem encoder in 639 | flush o len 640 | ; go (Def.dst encoder o 0 (bigstring_length o)) 641 | | `End encoder -> 642 | let len = bigstring_length o - Def.dst_rem encoder in 643 | if len > 0 then flush o len in 644 | go (Def.dst encoder o 0 (bigstring_length o)) 645 | 646 | let uncompress ~allocate ~refill ~flush i o = 647 | let decoder = Inf.decoder `Manual ~allocate ~o in 648 | let rec go decoder = 649 | match Inf.decode decoder with 650 | | `Await decoder -> 651 | let len = refill i in 652 | go (Inf.src decoder i 0 len) 653 | | `Flush decoder -> 654 | let len = bigstring_length o - Inf.dst_rem decoder in 655 | flush o len 656 | ; go (Inf.flush decoder) 657 | | `End decoder -> 658 | let len = bigstring_length o - Inf.dst_rem decoder in 659 | if len > 0 then flush o len 660 | ; Ok () 661 | | `Malformed err -> Error (`Msg err) in 662 | go decoder 663 | end 664 | -------------------------------------------------------------------------------- /lib/zl.mli: -------------------------------------------------------------------------------- 1 | (** {1 ZLIB layer.} 2 | 3 | ZLIB is a standard on top of RFC1951. It uses the {!De} implementation with 4 | the LZ77 compression algorithm. Module provides non-blocking streaming codec 5 | to {{:#decode}decode} and {{:#encode}encode} ZLIB encoding. It can 6 | efficiently work payload by payload without blocking IO. *) 7 | 8 | type bigstring = 9 | (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t 10 | (** The type for [bigstring]. *) 11 | 12 | type window = De.window 13 | (** The type for sliding windows. *) 14 | 15 | val io_buffer_size : int 16 | 17 | (** {2:decode ZLIB Decoder.} 18 | 19 | Unlike [de], [zl] provides a referentially transparent {!Inf.decoder}. The 20 | client must use a {!Inf.decoder} given {b by} {!Inf.decode} instead of a 21 | [decoder] given {b to} {!Inf.decode}. A common use of [zl] is: 22 | 23 | {[ 24 | let rec go d0 = match Inf.decode d0 with 25 | | `Await d1 -> ... go d1 26 | | `Flush d1 -> ... go d1 27 | | _ -> ... in 28 | ]} *) 29 | 30 | module Inf : sig 31 | type decoder 32 | (** The type for decoders. *) 33 | 34 | type src = [ `Channel of in_channel | `String of string | `Manual ] 35 | (** The type for input sources. With a [`Manual] source the client must 36 | provide input with {!src}. With [`String] or [`Channel] source the client 37 | can safely discard [`Await] case (with [assert false]). *) 38 | 39 | type signal = 40 | [ `Await of decoder 41 | | `Flush of decoder 42 | | `End of decoder 43 | | `Malformed of string ] 44 | 45 | val decoder : src -> o:bigstring -> allocate:(int -> window) -> decoder 46 | (** [decoder src ~o ~allocate] is a decoder that inputs from [src]. 47 | 48 | {b Output buffer.} 49 | 50 | [zl], as [de], uses [o] buffer as internal buffer to store output. We 51 | recommend to allocate an {!io_buffer_size} buffer as output buffer. Then, 52 | {!dst_rem}[ decoder] tells you how many unused bytes remain in [o]. 53 | 54 | {b Window.} 55 | 56 | ZLIB has a header to specify the window size needed to inflate a given 57 | input. When [zl] knows that, it calls [allocate] with a number [bits] so 58 | that [1 lsl bits] is the size of the window. [bits] can not be larger than 15 nor 59 | lower than 8. [allocate] can be [fun bits -> De.make_window ~bits] or a 60 | previously allocated window. [decoder] will take the {i ownership} on it! 61 | 62 | Ownership in our case means that {!decode} will mutate it in-place and expect 63 | it to remain unchanged between invocations. *) 64 | 65 | val decode : decoder -> signal 66 | (** [decode d0] is: 67 | 68 | {ul 69 | {- [`Await d1] if [d0] has a [`Manual] input source and awaits for more 70 | input. The client must use a {!src} with [d1] to provide it.} 71 | {- [`Flush d1] if given output buffer [o] (see {!decoder}) needs to be 72 | drained before work can be resumed. The client must use {!flush} with [d1] 73 | to {b completely} flush [o]. Usually [o] will be full and consist fully of 74 | bytes that need to be copied from the buffer, but sometimes only the first 75 | part of the buffer is used. In those cases {!dst_rem} will give you the 76 | amount of free/unused bytes remain in [o]. These should {b not} be copied 77 | since their contents are not part of the output. Instead, the first 78 | [bigstring_length o - Inf.dst_rem d1] bytes should be copied when flushing 79 | [o].} 80 | {- [`Malformed err] if given input is malformed. [err] is a human-readable 81 | error message.} 82 | {- [`End d1] if given input notify end of flow. [o] is possibly not empty 83 | (it can be check with {!dst_rem}).}} *) 84 | 85 | val reset : decoder -> decoder 86 | (** [reset d] is a [d] in its original state, as it was initialized by 87 | {!decoder}. *) 88 | 89 | val src : decoder -> bigstring -> int -> int -> decoder 90 | (** [src d s j l] provides [d] with [l] bytes to read, starting at [j] in [s]. 91 | This byte range is read by calls to {!decode} with [d] until [`Await] is 92 | returned. To signal the end of input call the function with [l = 0]. 93 | 94 | @raise Invalid_argument when [j] and [l] do not correspond to a valid 95 | range. *) 96 | 97 | val dst_rem : decoder -> int 98 | (** [dst_rem d] is how many unused bytes remain in the output buffer of [d]. *) 99 | 100 | val src_rem : decoder -> int 101 | (** [src_rem d] is how many unprocessed bytes remain in the input buffer of 102 | [d]. *) 103 | 104 | val write : decoder -> int 105 | (** [write d] is how many bytes [d] emitted since it was created. *) 106 | 107 | val flush : decoder -> decoder 108 | (** [flush d] is a decoder where internal output buffer [o] is {b completely} 109 | free to store bytes. *) 110 | 111 | module Ns : sig 112 | (** A non-streamable implementation of the RFC 1950. It considers the input 113 | to be whole and is therefore able to save some time *) 114 | 115 | type error = [ `Invalid_header | `Invalid_checksum | De.Inf.Ns.error ] 116 | (** The type for inflation errors. *) 117 | 118 | val pp_error : Format.formatter -> error -> unit 119 | (** Pretty-printer of {!error}. *) 120 | 121 | val inflate : bigstring -> bigstring -> (int * int, [> error ]) result 122 | (** [inflate src dst] inflates the content of [src] into [dst]. 123 | 124 | In case of success, it returns the bytes read and bytes writen in an 125 | [Ok] result. In case of failure, it returns the error in a [Error] 126 | result. *) 127 | end 128 | end 129 | 130 | (** {2:encode ZLIB Encoder.} 131 | 132 | ZLIB encoder is glue between the LZ77 algorithm and the DEFLATE encoder, 133 | prefixed with a ZLIB header. Any deal with compression algorithm is not 134 | possible on this layer (see {!De} for more details). As {!Inf}, and unlike 135 | {!De}, {!Zl} provides a referentially transparent encoder. 136 | 137 | The client must use the {!Def.encoder} given {b by} {!Def.encode} instead a 138 | [encoder] given {b to} {!Def.encode}. *) 139 | 140 | module Def : sig 141 | type src = [ `Channel of in_channel | `String of string | `Manual ] 142 | (** The type for input sources. With a [`Manual] source the client must 143 | provide input with {!src}. With [`String] or [`Channel] source the client 144 | can safely discard [`Await] case (with [assert false]). *) 145 | 146 | type dst = [ `Channel of out_channel | `Buffer of Buffer.t | `Manual ] 147 | (** The type for output destinations. With a [`Manual] destination the client 148 | must provide output storage with {!dst}. With [`Buffer] or [`Channel] 149 | destination the client can safely discard [`Flush] case (with [assert false]). *) 150 | 151 | type encoder 152 | (** The type for ZLIB encoders. *) 153 | 154 | type ret = [ `Await of encoder | `End of encoder | `Flush of encoder ] 155 | 156 | val encoder : 157 | ?dynamic:bool 158 | -> q:De.Queue.t 159 | -> w:De.Lz77.window 160 | -> level:int 161 | -> src 162 | -> dst 163 | -> encoder 164 | (** [encoder ~q ~w ~level src dst] is an encoder that inputs from [src] and 165 | that outputs to [dst]. 166 | 167 | {b Internal queue.} 168 | 169 | [encoder] deals internally with compression algorithm and DEFLATE encoder. 170 | To pass compression values to DEFLATE encoder, we need a queue [q]. Length 171 | of [q] has an impact on performance, and small lengths can be a bottleneck, 172 | leading {!encode} to emit many [`Flush]. We recommend a queue as large as 173 | output buffer. 174 | 175 | {b Window.} 176 | 177 | ZLIB is able to constrain length of window used to do LZ77 compression. 178 | However, small window can slow-down LZ77 compression algorithm. Small 179 | windows are mostly used to enable inflation of output in memory-constrained 180 | environments, for example when compressed data from untrusted sources must 181 | be processed. 182 | 183 | {b Level.} 184 | 185 | Zlib implements 10 levels (from 0 to 9). All of them use the dynamic & 186 | canonic huffman if [dynamic] is [true] (default). Otherwise, we use the 187 | static huffman. The higher the level, the better the ratio. *) 188 | 189 | val src_rem : encoder -> int 190 | (** [src_rem e] is how many bytes it remains in given input buffer. *) 191 | 192 | val dst_rem : encoder -> int 193 | (** [dst_rem e] is how many unused bytes remain in the output buffer of [e]. *) 194 | 195 | val src : encoder -> bigstring -> int -> int -> encoder 196 | (** [src e s j l] provides [e] with [l] bytes to read, starting at [j] in [s]. 197 | This byte range is read by calls to {!encode} with [e] until [`Await] is 198 | returned. To signal the end of input call the function with [l = 0]. 199 | 200 | @raise Invalid_argument when [j] and [l] do not correspond to a valid 201 | range. *) 202 | 203 | val dst : encoder -> bigstring -> int -> int -> encoder 204 | (** [dst e s j l] provides [e] with [l] bytes available to write, starting at 205 | [j] in [s]. This byte range is fill by calls to {!encode} with [e] until 206 | [`Flush] is returned. 207 | 208 | @raise Invalid_argument when [j] and [l] do not correspond to a valid 209 | range. *) 210 | 211 | val encode : encoder -> ret 212 | (** [encode e0] is: 213 | 214 | {ul 215 | {- [`Await e1] if [e0] has a [`Manual] input source and awaits for more 216 | input. The client must use {!src} with [e1] to provide it.} 217 | {- [`Flush e1] if [e0] has a [`Manual] destination and needs more output 218 | storage. The client must drain the buffer before resuming operation.} 219 | {- [`End e1] if [e1] encoded all input. Output buffer is possibly not 220 | empty (it can be check with {!dst_rem}).}} 221 | 222 | {b Limitation.} 223 | 224 | The encoder must manipulate an output buffer of, at least, 2 bytes. If it's 225 | not the case, [encode] does nothing - and it tells you nothing more than it 226 | did nothing. Depending on what you do, a loop can infinitely call [encode] 227 | without any updates until the given output still has less than 2 bytes. 228 | *) 229 | 230 | module Ns : sig 231 | type error = De.Def.Ns.error 232 | (** The type for deflation errors. *) 233 | 234 | val pp_error : Format.formatter -> error -> unit 235 | (** Pretty-printer for {!error}. *) 236 | 237 | val compress_bound : int -> int 238 | (** [compress_bound len] returns a {i clue} about how many bytes we need 239 | to store the result of the deflation of [len] bytes. It's a 240 | pessimistic calculation. *) 241 | 242 | val deflate : 243 | ?level:int -> bigstring -> bigstring -> (int, [> error ]) result 244 | (** [deflate ~level src dst] deflates the content of [src] into [dst]. 245 | 246 | In case of success, it returns the bytes writen in an [Ok] result. In case 247 | of failure, it returns the error in an [Error] result. {!compress_bound} 248 | can be used to {i determine} how many bytes the user needs to allocate 249 | as the destination buffer when he wants to compress [N] bytes. 250 | 251 | Here is an example of how to compress any inputs: 252 | {[ 253 | val input : bigstring 254 | 255 | let len = Zl.Def.Ns.compress_bound (De.bigstring_length input) in 256 | let dst = De.bigstring_create len in 257 | Zl.Def.Ns.deflate ~level:4 input dst 258 | ]} *) 259 | end 260 | end 261 | 262 | module Higher : sig 263 | val compress : 264 | ?level:int 265 | -> ?dynamic:bool 266 | -> w:De.Lz77.window 267 | -> q:De.Queue.t 268 | -> refill:(bigstring -> int) 269 | -> flush:(bigstring -> int -> unit) 270 | -> bigstring 271 | -> bigstring 272 | -> unit 273 | (** [compress ?level ?dynamic ~w ~q ~refill ~flush i o] is [Zlib.compress] 274 | (with [~header:true]) provided by [camlzip] package. 275 | 276 | {ul 277 | {- [w] is the window used by LZ77 compression algorithm.} 278 | {- [q] is shared-queue between compression algorithm and DEFLATE encoder.} 279 | {- [i] is input buffer.} 280 | {- [o] is output buffer.}} 281 | 282 | When [compress] wants more input, it calls [refill] with [i]. The client 283 | returns how many bytes he wrote into [i]. If he returns 0, he signals end 284 | of input. 285 | 286 | When [compress] has written output buffer, it calls [flush] with [o] and 287 | how many bytes it wrote. Bytes into [o] must be {b copied} and they will be 288 | lost at the next call to [flush]. 289 | 290 | A simple example of how to use such interface is: 291 | {[ 292 | let deflate_string ?(level= 4) str = 293 | let i = De.bigstring_create De.io_buffer_size in 294 | let o = De.bigstring_create De.io_buffer_size in 295 | let w = De.Lz77.make_window ~bits:15 in 296 | let q = De.Queue.create 0x1000 in 297 | let r = Buffer.create 0x1000 in 298 | let p = ref 0 in 299 | let refill buf = 300 | let len = min (String.length str - !p) De.io_buffer_size in 301 | Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len ; 302 | p := !p + len ; len in 303 | let flush buf len = 304 | let str = Bigstringaf.substring buf ~off:0 ~len in 305 | Buffer.add_string r str in 306 | Zl.Higher.compress ~level ~dynamic:true 307 | ~w ~q ~refill ~flush i o ; 308 | Buffer.contents r 309 | ]} 310 | 311 | As {!De.Higher.compress}, several choices was made in this code and 312 | [decompress] don't want to be responsible of them. It's why such function 313 | exists only as example when lengths of buffers (such as [i], [o] or [q]) 314 | changes the speed/compression ratio/memory consumption. 315 | *) 316 | 317 | val uncompress : 318 | allocate:(int -> window) 319 | -> refill:(bigstring -> int) 320 | -> flush:(bigstring -> int -> unit) 321 | -> bigstring 322 | -> bigstring 323 | -> (unit, [> `Msg of string ]) result 324 | (** [uncompress ~allocate ~refill ~flush i o] is [Zlib.uncompress] (with 325 | [~header:true]) provided by [camlzip] package. 326 | 327 | {ul 328 | {- [allocate] is the allocator of window used by LZ77 uncompression algorithm} 329 | {- [i] is input buffer.} 330 | {- [o] is output buffer.}} 331 | 332 | When [uncompress] wants more input, it calls [refill] with [i]. The client 333 | returns how many bytes he wrote into [i]. If he returns 0, he signals end 334 | of input. 335 | 336 | When [uncompress] has written output buffer, it calls [flush] with [o] and 337 | how many bytes it wrote. Bytes into [o] must be {b copied} and they will be 338 | lost at the next call to [flush]. 339 | 340 | A simple example of how to use such interface is: 341 | {[ 342 | let inflate_string str = 343 | let i = De.bigstring_create De.io_buffer_size in 344 | let o = De.bigstring_create De.io_buffer_size in 345 | let allocate bits = De.make_window ~bits in 346 | let r = Buffer.create 0x1000 in 347 | let p = ref 0 in 348 | let refill buf = 349 | let len = min (String.length str - !p) De.io_buffer_size in 350 | Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len ; 351 | p := !p + len ; len in 352 | let flush buf len = 353 | let str = Bigstringaf.substring buf ~off:0 ~len in 354 | Buffer.add_string r str in 355 | match Zl.Higher.uncompress ~allocate ~refill ~flush i o with 356 | | Ok () -> Ok (Buffer.contents r) 357 | | Error _ as err -> err 358 | ]} 359 | 360 | As you can see, several allocations appear. As long as you want to 361 | uncompress several contents for example, you can re-use the same {i window} 362 | instead of an allocation of one per uncompression. Then, the throughput is 363 | mostly limited by [i] and [o] (even bigger, even faster but it requires 364 | memories). [decompress] don't want to be responsible about these choices, 365 | it's why such function exists only as an example. *) 366 | end 367 | -------------------------------------------------------------------------------- /rfc1951.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | name: "rfc1951" 3 | maintainer: "Romain Calascibetta " 4 | authors: "Romain Calascibetta " 5 | homepage: "https://github.com/mirage/decompress" 6 | bug-reports: "https://github.com/mirage/decompress/issues" 7 | dev-repo: "git+https://github.com/mirage/decompress.git" 8 | doc: "https://mirage.github.io/decompress/" 9 | license: "MIT" 10 | synopsis: "Implementation of RFC1951 in OCaml" 11 | description: """This package provide an implementation of RFC1951 in OCaml. 12 | 13 | We provide a pure non-blocking interface to inflate and deflate data flow. 14 | """ 15 | 16 | build: [ "dune" "build" "-p" name "-j" jobs ] 17 | run-test: [ "dune" "runtest" "-p" name "-j" jobs ] 18 | 19 | depends: [ 20 | "ocaml" {>= "4.07.0"} 21 | "dune" {>= "2.8"} 22 | "decompress" {= version} 23 | "checkseum" 24 | "optint" 25 | "ctypes" {with-test & >= "0.18.0"} 26 | ] 27 | x-maintenance-intent: [ "(latest)" ] 28 | -------------------------------------------------------------------------------- /test/bin/dune: -------------------------------------------------------------------------------- 1 | (cram 2 | (package decompress) 3 | (deps ../corpus/news ../corpus/bib zpipe.c %{bin:decompress})) 4 | -------------------------------------------------------------------------------- /test/bin/simple.t: -------------------------------------------------------------------------------- 1 | Simple tests 2 | $ echo "Hello World!" | decompress -d > simple.z 3 | $ decompress < simple.z 4 | Hello World! 5 | $ echo "Hello World!" | decompress -d -fzlib > simple.z 6 | $ decompress -fzlib < simple.z 7 | Hello World! 8 | $ cc -o zpipe zpipe.c -lz 9 | $ ./zpipe -d < simple.z 10 | Hello World! 11 | $ ./zpipe < ../corpus/news > news.z 12 | $ decompress -fzlib < news.z > news 13 | $ diff news ../corpus/news 14 | $ decompress -fzlib -d < ../corpus/news > news.z 15 | $ ./zpipe -d < news.z > news 16 | $ diff news ../corpus/news 17 | $ decompress -fgzip -d < ../corpus/news > news.gz 18 | $ decompress -fgzip < news.gz > news 19 | $ diff news ../corpus/news 20 | $ decompress -fzlib -d ../corpus/bib bib.zlib 21 | $ decompress -fzlib bib.zlib bib 22 | $ diff bib ../corpus/bib 23 | $ decompress -fzlib -d --level 0 ../corpus/bib bib.zlib 24 | $ decompress -fzlib bib.zlib bib 25 | $ diff bib ../corpus/bib 26 | $ decompress -fgzip -d --level 0 ../corpus/news news.gz 27 | $ decompress -fgzip news.gz news 28 | $ diff news ../corpus/news 29 | -------------------------------------------------------------------------------- /test/bin/zpipe.c: -------------------------------------------------------------------------------- 1 | /* zpipe.c: example of proper use of zlib's inflate() and deflate() 2 | Not copyrighted -- provided to the public domain 3 | Version 1.4 11 December 2005 Mark Adler */ 4 | 5 | /* Version history: 6 | 1.0 30 Oct 2004 First version 7 | 1.1 8 Nov 2004 Add void casting for unused return values 8 | Use switch statement for inflate() return values 9 | 1.2 9 Nov 2004 Add assertions to document zlib guarantees 10 | 1.3 6 Apr 2005 Remove incorrect assertion in inf() 11 | 1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions 12 | Avoid some compiler warnings for input and output buffers 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include "zlib.h" 19 | 20 | #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) 21 | # include 22 | # include 23 | # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) 24 | #else 25 | # define SET_BINARY_MODE(file) 26 | #endif 27 | 28 | #define CHUNK 16384 29 | 30 | /* Compress from file source to file dest until EOF on source. 31 | def() returns Z_OK on success, Z_MEM_ERROR if memory could not be 32 | allocated for processing, Z_STREAM_ERROR if an invalid compression 33 | level is supplied, Z_VERSION_ERROR if the version of zlib.h and the 34 | version of the library linked do not match, or Z_ERRNO if there is 35 | an error reading or writing the files. */ 36 | int def(FILE *source, FILE *dest, int level) 37 | { 38 | int ret, flush; 39 | unsigned have; 40 | z_stream strm; 41 | unsigned char in[CHUNK]; 42 | unsigned char out[CHUNK]; 43 | 44 | /* allocate deflate state */ 45 | strm.zalloc = Z_NULL; 46 | strm.zfree = Z_NULL; 47 | strm.opaque = Z_NULL; 48 | ret = deflateInit(&strm, level); 49 | if (ret != Z_OK) 50 | return ret; 51 | 52 | /* compress until end of file */ 53 | do { 54 | strm.avail_in = fread(in, 1, CHUNK, source); 55 | if (ferror(source)) { 56 | (void)deflateEnd(&strm); 57 | return Z_ERRNO; 58 | } 59 | flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; 60 | strm.next_in = in; 61 | 62 | /* run deflate() on input until output buffer not full, finish 63 | compression if all of source has been read in */ 64 | do { 65 | strm.avail_out = CHUNK; 66 | strm.next_out = out; 67 | ret = deflate(&strm, flush); /* no bad return value */ 68 | assert(ret != Z_STREAM_ERROR); /* state not clobbered */ 69 | have = CHUNK - strm.avail_out; 70 | if (fwrite(out, 1, have, dest) != have || ferror(dest)) { 71 | (void)deflateEnd(&strm); 72 | return Z_ERRNO; 73 | } 74 | } while (strm.avail_out == 0); 75 | assert(strm.avail_in == 0); /* all input will be used */ 76 | 77 | /* done when last data in file processed */ 78 | } while (flush != Z_FINISH); 79 | assert(ret == Z_STREAM_END); /* stream will be complete */ 80 | 81 | /* clean up and return */ 82 | (void)deflateEnd(&strm); 83 | return Z_OK; 84 | } 85 | 86 | /* Decompress from file source to file dest until stream ends or EOF. 87 | inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be 88 | allocated for processing, Z_DATA_ERROR if the deflate data is 89 | invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and 90 | the version of the library linked do not match, or Z_ERRNO if there 91 | is an error reading or writing the files. */ 92 | int inf(FILE *source, FILE *dest) 93 | { 94 | int ret; 95 | unsigned have; 96 | z_stream strm; 97 | unsigned char in[CHUNK]; 98 | unsigned char out[CHUNK]; 99 | 100 | /* allocate inflate state */ 101 | strm.zalloc = Z_NULL; 102 | strm.zfree = Z_NULL; 103 | strm.opaque = Z_NULL; 104 | strm.avail_in = 0; 105 | strm.next_in = Z_NULL; 106 | ret = inflateInit(&strm); 107 | if (ret != Z_OK) 108 | return ret; 109 | 110 | /* decompress until deflate stream ends or end of file */ 111 | do { 112 | strm.avail_in = fread(in, 1, CHUNK, source); 113 | if (ferror(source)) { 114 | (void)inflateEnd(&strm); 115 | return Z_ERRNO; 116 | } 117 | if (strm.avail_in == 0) 118 | break; 119 | strm.next_in = in; 120 | 121 | /* run inflate() on input until output buffer not full */ 122 | do { 123 | strm.avail_out = CHUNK; 124 | strm.next_out = out; 125 | ret = inflate(&strm, Z_NO_FLUSH); 126 | assert(ret != Z_STREAM_ERROR); /* state not clobbered */ 127 | switch (ret) { 128 | case Z_NEED_DICT: 129 | ret = Z_DATA_ERROR; /* and fall through */ 130 | case Z_DATA_ERROR: 131 | case Z_MEM_ERROR: 132 | (void)inflateEnd(&strm); 133 | return ret; 134 | } 135 | have = CHUNK - strm.avail_out; 136 | if (fwrite(out, 1, have, dest) != have || ferror(dest)) { 137 | (void)inflateEnd(&strm); 138 | return Z_ERRNO; 139 | } 140 | } while (strm.avail_out == 0); 141 | 142 | /* done when inflate() says it's done */ 143 | } while (ret != Z_STREAM_END); 144 | 145 | /* clean up and return */ 146 | (void)inflateEnd(&strm); 147 | return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; 148 | } 149 | 150 | /* report a zlib or i/o error */ 151 | void zerr(int ret) 152 | { 153 | fputs("zpipe: ", stderr); 154 | switch (ret) { 155 | case Z_ERRNO: 156 | if (ferror(stdin)) 157 | fputs("error reading stdin\n", stderr); 158 | if (ferror(stdout)) 159 | fputs("error writing stdout\n", stderr); 160 | break; 161 | case Z_STREAM_ERROR: 162 | fputs("invalid compression level\n", stderr); 163 | break; 164 | case Z_DATA_ERROR: 165 | fputs("invalid or incomplete deflate data\n", stderr); 166 | break; 167 | case Z_MEM_ERROR: 168 | fputs("out of memory\n", stderr); 169 | break; 170 | case Z_VERSION_ERROR: 171 | fputs("zlib version mismatch!\n", stderr); 172 | } 173 | } 174 | 175 | /* compress or decompress from stdin to stdout */ 176 | int main(int argc, char **argv) 177 | { 178 | int ret; 179 | 180 | /* avoid end-of-line conversions */ 181 | SET_BINARY_MODE(stdin); 182 | SET_BINARY_MODE(stdout); 183 | 184 | /* do compression if no arguments */ 185 | if (argc == 1) { 186 | ret = def(stdin, stdout, 4); 187 | if (ret != Z_OK) 188 | zerr(ret); 189 | return ret; 190 | } 191 | 192 | /* do decompression if -d specified */ 193 | else if (argc == 2 && strcmp(argv[1], "-d") == 0) { 194 | ret = inf(stdin, stdout); 195 | if (ret != Z_OK) 196 | zerr(ret); 197 | return ret; 198 | } 199 | 200 | /* otherwise, report usage */ 201 | else { 202 | fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); 203 | return 1; 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /test/corpus/geo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mirage/decompress/11691f09e087ba987a9b5384178adcff5ec5c991/test/corpus/geo -------------------------------------------------------------------------------- /test/corpus/obj1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mirage/decompress/11691f09e087ba987a9b5384178adcff5ec5c991/test/corpus/obj1 -------------------------------------------------------------------------------- /test/corpus/obj2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mirage/decompress/11691f09e087ba987a9b5384178adcff5ec5c991/test/corpus/obj2 -------------------------------------------------------------------------------- /test/corpus/pic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mirage/decompress/11691f09e087ba987a9b5384178adcff5ec5c991/test/corpus/pic -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name test) 3 | (modules test test_ns) 4 | (libraries fmt base64 camlzip bigstringaf checkseum.c de zl gz lzo alcotest)) 5 | 6 | (executable 7 | (name test_deflate) 8 | (modules test_deflate) 9 | (libraries fmt de bigstringaf alcotest)) 10 | 11 | (executable 12 | (name test_lzo) 13 | (modules test_lzo) 14 | (libraries fmt minilzo lzo bigstringaf alcotest)) 15 | 16 | (rule 17 | (alias runtest) 18 | (package decompress) 19 | (deps 20 | (:test test_deflate.exe) 21 | (source_tree corpus)) 22 | (action 23 | (run %{test} --color=always))) 24 | 25 | (rule 26 | (alias runtest) 27 | (package decompress) 28 | (deps 29 | (:test test.exe) 30 | (source_tree corpus)) 31 | (action 32 | (run %{test} --color=always))) 33 | 34 | (rule 35 | (alias runtest) 36 | (package decompress) 37 | (deps 38 | (:test test_lzo.exe)) 39 | (action 40 | (run %{test} --color=always))) 41 | -------------------------------------------------------------------------------- /test/minilzo-2.10/COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /test/minilzo-2.10/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # a very simple Makefile for miniLZO 3 | # 4 | # Copyright (C) 1996-2017 Markus F.X.J. Oberhumer 5 | # 6 | 7 | PROGRAM = testmini 8 | SOURCES = testmini.c minilzo.c 9 | 10 | default: 11 | @echo "" 12 | @echo "Welcome to miniLZO. Please choose one of the following 'make' targets:" 13 | @echo "" 14 | @echo " gcc: gcc" 15 | @echo " unix: hpux hpux9" 16 | @echo " win32: win32-bc win32-cygwin win32-dm win32-lccwin32" 17 | @echo " win32-intelc win32-mingw win32-vc win32-watcomc" 18 | @echo " dos32: dos32-djgpp2 dos32-wc" 19 | @echo "" 20 | 21 | 22 | # Make sure that minilzo.h, lzoconf.h and lzodefs.h are in the 23 | # current dircectory. Otherwise you may want to adjust CPPFLAGS. 24 | CPPFLAGS = -I. -I../include/lzo 25 | 26 | GCC_CFLAGS = -s -Wall -O2 -fomit-frame-pointer 27 | 28 | 29 | # 30 | # gcc (generic) 31 | # 32 | 33 | gcc: 34 | gcc $(CPPFLAGS) $(GCC_CFLAGS) -o $(PROGRAM) $(SOURCES) 35 | 36 | cc: 37 | cc $(CPPFLAGS) -o $(PROGRAM) $(SOURCES) 38 | 39 | 40 | # 41 | # UNIX 42 | # 43 | 44 | hpux: 45 | cc -Ae $(CPPFLAGS) -o $(PROGRAM) $(SOURCES) 46 | 47 | hpux9: 48 | cc -Aa -D_HPUX_SOURCE $(CPPFLAGS) -o $(PROGRAM) $(SOURCES) 49 | 50 | 51 | # 52 | # Windows (32-bit) 53 | # 54 | 55 | win32-borlandc win32-bc: 56 | bcc32 -O2 -d -w -w-aus $(CPPFLAGS) $(SOURCES) 57 | 58 | win32-cygwin32 win32-cygwin: 59 | gcc -mcygwin $(CPPFLAGS) $(GCC_CFLAGS) -o $(PROGRAM).exe $(SOURCES) 60 | 61 | win32-digitalmars win32-dm: 62 | dmc -mn -o -w- $(CPPFLAGS) $(SOURCES) 63 | 64 | win32-intelc win32-ic: 65 | icl -nologo -MT -W3 -O2 -GF $(CPPFLAGS) $(SOURCES) 66 | 67 | win32-lccwin32: 68 | @echo "NOTE: need lcc 2002-07-25 or newer, older versions have bugs" 69 | lc -A -unused -O $(CPPFLAGS) $(SOURCES) 70 | 71 | win32-mingw32 win32-mingw: 72 | gcc -mno-cygwin $(CPPFLAGS) $(GCC_CFLAGS) -o $(PROGRAM).exe $(SOURCES) 73 | 74 | win32-visualc win32-vc: 75 | cl -nologo -MT -W3 -O2 -GF $(CPPFLAGS) $(SOURCES) 76 | 77 | win32-watcomc win32-wc: 78 | wcl386 -bt=nt -zq -mf -5r -zc -w5 -oneatx $(CPPFLAGS) $(SOURCES) 79 | 80 | 81 | # 82 | # DOS (32-bit) 83 | # 84 | 85 | dos32-djgpp2 dos32-dj2: 86 | gcc $(CPPFLAGS) $(GCC_CFLAGS) -o $(PROGRAM).exe $(SOURCES) 87 | 88 | dos32-watcomc dos32-wc: 89 | wcl386 -zq -mf -bt=dos -l=dos4g -5r -ox -zc $(CPPFLAGS) $(SOURCES) 90 | 91 | 92 | # 93 | # other targets 94 | # 95 | 96 | clean: 97 | rm -f $(PROGRAM) $(PROGRAM).exe $(PROGRAM).map $(PROGRAM).tds 98 | rm -f *.err *.o *.obj 99 | 100 | .PHONY: default clean 101 | -------------------------------------------------------------------------------- /test/minilzo-2.10/README.LZO: -------------------------------------------------------------------------------- 1 | 2 | ============================================================================ 3 | miniLZO -- mini subset of the LZO real-time data compression library 4 | ============================================================================ 5 | 6 | Author : Markus Franz Xaver Johannes Oberhumer 7 | 8 | http://www.oberhumer.com/opensource/lzo/ 9 | Version : 2.10 10 | Date : 01 Mar 2017 11 | 12 | I've created miniLZO for projects where it is inconvenient to 13 | include (or require) the full LZO source code just because you 14 | want to add a little bit of data compression to your application. 15 | 16 | miniLZO implements the LZO1X-1 compressor and both the standard and 17 | safe LZO1X decompressor. Apart from fast compression it also useful 18 | for situations where you want to use pre-compressed data files (which 19 | must have been compressed with LZO1X-999). 20 | 21 | miniLZO consists of one C source file and three header files: 22 | minilzo.c 23 | minilzo.h, lzoconf.h, lzodefs.h 24 | 25 | To use miniLZO just copy these files into your source directory, add 26 | minilzo.c to your Makefile and #include minilzo.h from your program. 27 | Note: you also must distribute this file ('README.LZO') with your project. 28 | 29 | minilzo.o compiles to about 6 KiB (using gcc or Visual C on an i386), and 30 | the sources are about 30 KiB when packed with zip - so there's no more 31 | excuse that your application doesn't support data compression :-) 32 | 33 | For more information, documentation, example programs and other support 34 | files (like Makefiles and build scripts) please download the full LZO 35 | package from 36 | http://www.oberhumer.com/opensource/lzo/ 37 | 38 | Have fun, 39 | Markus 40 | 41 | 42 | P.S. minilzo.c is generated automatically from the LZO sources and 43 | therefore functionality is completely identical 44 | 45 | 46 | Appendix A: building miniLZO 47 | ---------------------------- 48 | miniLZO is written such a way that it should compile and run 49 | out-of-the-box on most machines. 50 | 51 | If you are running on a very unusual architecture and lzo_init() fails then 52 | you should first recompile with '-DLZO_DEBUG' to see what causes the failure. 53 | The most probable case is something like 'sizeof(void *) != sizeof(size_t)'. 54 | After identifying the problem you can compile by adding some defines 55 | like '-DSIZEOF_VOID_P=8' to your Makefile. 56 | 57 | The best solution is (of course) using Autoconf - if your project uses 58 | Autoconf anyway just add '-DMINILZO_HAVE_CONFIG_H' to your compiler 59 | flags when compiling minilzo.c. See the LZO distribution for an example 60 | how to set up configure.ac. 61 | 62 | 63 | Appendix B: list of public functions available in miniLZO 64 | --------------------------------------------------------- 65 | Library initialization 66 | lzo_init() 67 | 68 | Compression 69 | lzo1x_1_compress() 70 | 71 | Decompression 72 | lzo1x_decompress() 73 | lzo1x_decompress_safe() 74 | 75 | Checksum functions 76 | lzo_adler32() 77 | 78 | Version functions 79 | lzo_version() 80 | lzo_version_string() 81 | lzo_version_date() 82 | 83 | Portable (but slow) string functions 84 | lzo_memcmp() 85 | lzo_memcpy() 86 | lzo_memmove() 87 | lzo_memset() 88 | 89 | 90 | Appendix C: suggested macros for 'configure.ac' when using Autoconf 91 | ------------------------------------------------------------------- 92 | Checks for typedefs and structures 93 | AC_CHECK_TYPE(ptrdiff_t,long) 94 | AC_TYPE_SIZE_T 95 | AC_CHECK_SIZEOF(short) 96 | AC_CHECK_SIZEOF(int) 97 | AC_CHECK_SIZEOF(long) 98 | AC_CHECK_SIZEOF(long long) 99 | AC_CHECK_SIZEOF(__int64) 100 | AC_CHECK_SIZEOF(void *) 101 | AC_CHECK_SIZEOF(size_t) 102 | AC_CHECK_SIZEOF(ptrdiff_t) 103 | 104 | Checks for compiler characteristics 105 | AC_C_CONST 106 | 107 | Checks for library functions 108 | AC_CHECK_FUNCS(memcmp memcpy memmove memset) 109 | 110 | 111 | Appendix D: Copyright 112 | --------------------- 113 | LZO and miniLZO are Copyright (C) 1996-2017 Markus Franz Xaver Oberhumer 114 | All Rights Reserved. 115 | 116 | LZO and miniLZO are distributed under the terms of the GNU General 117 | Public License (GPL). See the file COPYING. 118 | 119 | Special licenses for commercial and other applications which 120 | are not willing to accept the GNU General Public License 121 | are available by contacting the author. 122 | 123 | 124 | -------------------------------------------------------------------------------- /test/minilzo-2.10/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name minilzo) 3 | (foreign_stubs 4 | (language c) 5 | (names stubs minilzo) 6 | (flags 7 | (:standard -I.)))) 8 | -------------------------------------------------------------------------------- /test/minilzo-2.10/lzoconf.h: -------------------------------------------------------------------------------- 1 | /* lzoconf.h -- configuration of the LZO data compression library 2 | 3 | This file is part of the LZO real-time data compression library. 4 | 5 | Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer 6 | All Rights Reserved. 7 | 8 | The LZO library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU General Public License as 10 | published by the Free Software Foundation; either version 2 of 11 | the License, or (at your option) any later version. 12 | 13 | The LZO library is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with the LZO library; see the file COPYING. 20 | If not, write to the Free Software Foundation, Inc., 21 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 22 | 23 | Markus F.X.J. Oberhumer 24 | 25 | http://www.oberhumer.com/opensource/lzo/ 26 | */ 27 | 28 | 29 | #ifndef __LZOCONF_H_INCLUDED 30 | #define __LZOCONF_H_INCLUDED 1 31 | 32 | #define LZO_VERSION 0x20a0 /* 2.10 */ 33 | #define LZO_VERSION_STRING "2.10" 34 | #define LZO_VERSION_DATE "Mar 01 2017" 35 | 36 | /* internal Autoconf configuration file - only used when building LZO */ 37 | #if defined(LZO_HAVE_CONFIG_H) 38 | # include 39 | #endif 40 | #include 41 | #include 42 | 43 | 44 | /*********************************************************************** 45 | // LZO requires a conforming 46 | ************************************************************************/ 47 | 48 | #if !defined(CHAR_BIT) || (CHAR_BIT != 8) 49 | # error "invalid CHAR_BIT" 50 | #endif 51 | #if !defined(UCHAR_MAX) || !defined(USHRT_MAX) || !defined(UINT_MAX) || !defined(ULONG_MAX) 52 | # error "check your compiler installation" 53 | #endif 54 | #if (USHRT_MAX < 1) || (UINT_MAX < 1) || (ULONG_MAX < 1) 55 | # error "your limits.h macros are broken" 56 | #endif 57 | 58 | /* get OS and architecture defines */ 59 | #ifndef __LZODEFS_H_INCLUDED 60 | #include 61 | #endif 62 | 63 | 64 | #ifdef __cplusplus 65 | extern "C" { 66 | #endif 67 | 68 | 69 | /*********************************************************************** 70 | // some core defines 71 | ************************************************************************/ 72 | 73 | /* memory checkers */ 74 | #if !defined(__LZO_CHECKER) 75 | # if defined(__BOUNDS_CHECKING_ON) 76 | # define __LZO_CHECKER 1 77 | # elif defined(__CHECKER__) 78 | # define __LZO_CHECKER 1 79 | # elif defined(__INSURE__) 80 | # define __LZO_CHECKER 1 81 | # elif defined(__PURIFY__) 82 | # define __LZO_CHECKER 1 83 | # endif 84 | #endif 85 | 86 | 87 | /*********************************************************************** 88 | // integral and pointer types 89 | ************************************************************************/ 90 | 91 | /* lzo_uint must match size_t */ 92 | #if !defined(LZO_UINT_MAX) 93 | # if (LZO_ABI_LLP64) 94 | # if (LZO_OS_WIN64) 95 | typedef unsigned __int64 lzo_uint; 96 | typedef __int64 lzo_int; 97 | # define LZO_TYPEOF_LZO_INT LZO_TYPEOF___INT64 98 | # else 99 | typedef lzo_ullong_t lzo_uint; 100 | typedef lzo_llong_t lzo_int; 101 | # define LZO_TYPEOF_LZO_INT LZO_TYPEOF_LONG_LONG 102 | # endif 103 | # define LZO_SIZEOF_LZO_INT 8 104 | # define LZO_UINT_MAX 0xffffffffffffffffull 105 | # define LZO_INT_MAX 9223372036854775807LL 106 | # define LZO_INT_MIN (-1LL - LZO_INT_MAX) 107 | # elif (LZO_ABI_IP32L64) /* MIPS R5900 */ 108 | typedef unsigned int lzo_uint; 109 | typedef int lzo_int; 110 | # define LZO_SIZEOF_LZO_INT LZO_SIZEOF_INT 111 | # define LZO_TYPEOF_LZO_INT LZO_TYPEOF_INT 112 | # define LZO_UINT_MAX UINT_MAX 113 | # define LZO_INT_MAX INT_MAX 114 | # define LZO_INT_MIN INT_MIN 115 | # elif (ULONG_MAX >= LZO_0xffffffffL) 116 | typedef unsigned long lzo_uint; 117 | typedef long lzo_int; 118 | # define LZO_SIZEOF_LZO_INT LZO_SIZEOF_LONG 119 | # define LZO_TYPEOF_LZO_INT LZO_TYPEOF_LONG 120 | # define LZO_UINT_MAX ULONG_MAX 121 | # define LZO_INT_MAX LONG_MAX 122 | # define LZO_INT_MIN LONG_MIN 123 | # else 124 | # error "lzo_uint" 125 | # endif 126 | #endif 127 | 128 | /* The larger type of lzo_uint and lzo_uint32_t. */ 129 | #if (LZO_SIZEOF_LZO_INT >= 4) 130 | # define lzo_xint lzo_uint 131 | #else 132 | # define lzo_xint lzo_uint32_t 133 | #endif 134 | 135 | typedef int lzo_bool; 136 | 137 | /* sanity checks */ 138 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_int) == LZO_SIZEOF_LZO_INT) 139 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint) == LZO_SIZEOF_LZO_INT) 140 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_xint) >= sizeof(lzo_uint)) 141 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_xint) >= sizeof(lzo_uint32_t)) 142 | 143 | #ifndef __LZO_MMODEL 144 | #define __LZO_MMODEL /*empty*/ 145 | #endif 146 | 147 | /* no typedef here because of const-pointer issues */ 148 | #define lzo_bytep unsigned char __LZO_MMODEL * 149 | #define lzo_charp char __LZO_MMODEL * 150 | #define lzo_voidp void __LZO_MMODEL * 151 | #define lzo_shortp short __LZO_MMODEL * 152 | #define lzo_ushortp unsigned short __LZO_MMODEL * 153 | #define lzo_intp lzo_int __LZO_MMODEL * 154 | #define lzo_uintp lzo_uint __LZO_MMODEL * 155 | #define lzo_xintp lzo_xint __LZO_MMODEL * 156 | #define lzo_voidpp lzo_voidp __LZO_MMODEL * 157 | #define lzo_bytepp lzo_bytep __LZO_MMODEL * 158 | 159 | #define lzo_int8_tp lzo_int8_t __LZO_MMODEL * 160 | #define lzo_uint8_tp lzo_uint8_t __LZO_MMODEL * 161 | #define lzo_int16_tp lzo_int16_t __LZO_MMODEL * 162 | #define lzo_uint16_tp lzo_uint16_t __LZO_MMODEL * 163 | #define lzo_int32_tp lzo_int32_t __LZO_MMODEL * 164 | #define lzo_uint32_tp lzo_uint32_t __LZO_MMODEL * 165 | #if defined(lzo_int64_t) 166 | #define lzo_int64_tp lzo_int64_t __LZO_MMODEL * 167 | #define lzo_uint64_tp lzo_uint64_t __LZO_MMODEL * 168 | #endif 169 | 170 | /* Older LZO versions used to support ancient systems and memory models 171 | * such as 16-bit MSDOS with __huge pointers or Cray PVP, but these 172 | * obsolete configurations are not supported any longer. 173 | */ 174 | #if defined(__LZO_MMODEL_HUGE) 175 | #error "__LZO_MMODEL_HUGE memory model is unsupported" 176 | #endif 177 | #if (LZO_MM_PVP) 178 | #error "LZO_MM_PVP memory model is unsupported" 179 | #endif 180 | #if (LZO_SIZEOF_INT < 4) 181 | #error "LZO_SIZEOF_INT < 4 is unsupported" 182 | #endif 183 | #if (__LZO_UINTPTR_T_IS_POINTER) 184 | #error "__LZO_UINTPTR_T_IS_POINTER is unsupported" 185 | #endif 186 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(int) >= 4) 187 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint) >= 4) 188 | /* Strange configurations where sizeof(lzo_uint) != sizeof(size_t) should 189 | * work but have not received much testing lately, so be strict here. 190 | */ 191 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint) == sizeof(size_t)) 192 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint) == sizeof(ptrdiff_t)) 193 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(lzo_uint) == sizeof(lzo_uintptr_t)) 194 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(void *) == sizeof(lzo_uintptr_t)) 195 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(char *) == sizeof(lzo_uintptr_t)) 196 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(long *) == sizeof(lzo_uintptr_t)) 197 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(void *) == sizeof(lzo_voidp)) 198 | LZO_COMPILE_TIME_ASSERT_HEADER(sizeof(char *) == sizeof(lzo_bytep)) 199 | 200 | 201 | /*********************************************************************** 202 | // function types 203 | ************************************************************************/ 204 | 205 | /* name mangling */ 206 | #if !defined(__LZO_EXTERN_C) 207 | # ifdef __cplusplus 208 | # define __LZO_EXTERN_C extern "C" 209 | # else 210 | # define __LZO_EXTERN_C extern 211 | # endif 212 | #endif 213 | 214 | /* calling convention */ 215 | #if !defined(__LZO_CDECL) 216 | # define __LZO_CDECL __lzo_cdecl 217 | #endif 218 | 219 | /* DLL export information */ 220 | #if !defined(__LZO_EXPORT1) 221 | # define __LZO_EXPORT1 /*empty*/ 222 | #endif 223 | #if !defined(__LZO_EXPORT2) 224 | # define __LZO_EXPORT2 /*empty*/ 225 | #endif 226 | 227 | /* __cdecl calling convention for public C and assembly functions */ 228 | #if !defined(LZO_PUBLIC) 229 | # define LZO_PUBLIC(r) __LZO_EXPORT1 r __LZO_EXPORT2 __LZO_CDECL 230 | #endif 231 | #if !defined(LZO_EXTERN) 232 | # define LZO_EXTERN(r) __LZO_EXTERN_C LZO_PUBLIC(r) 233 | #endif 234 | #if !defined(LZO_PRIVATE) 235 | # define LZO_PRIVATE(r) static r __LZO_CDECL 236 | #endif 237 | 238 | /* function types */ 239 | typedef int 240 | (__LZO_CDECL *lzo_compress_t) ( const lzo_bytep src, lzo_uint src_len, 241 | lzo_bytep dst, lzo_uintp dst_len, 242 | lzo_voidp wrkmem ); 243 | 244 | typedef int 245 | (__LZO_CDECL *lzo_decompress_t) ( const lzo_bytep src, lzo_uint src_len, 246 | lzo_bytep dst, lzo_uintp dst_len, 247 | lzo_voidp wrkmem ); 248 | 249 | typedef int 250 | (__LZO_CDECL *lzo_optimize_t) ( lzo_bytep src, lzo_uint src_len, 251 | lzo_bytep dst, lzo_uintp dst_len, 252 | lzo_voidp wrkmem ); 253 | 254 | typedef int 255 | (__LZO_CDECL *lzo_compress_dict_t)(const lzo_bytep src, lzo_uint src_len, 256 | lzo_bytep dst, lzo_uintp dst_len, 257 | lzo_voidp wrkmem, 258 | const lzo_bytep dict, lzo_uint dict_len ); 259 | 260 | typedef int 261 | (__LZO_CDECL *lzo_decompress_dict_t)(const lzo_bytep src, lzo_uint src_len, 262 | lzo_bytep dst, lzo_uintp dst_len, 263 | lzo_voidp wrkmem, 264 | const lzo_bytep dict, lzo_uint dict_len ); 265 | 266 | 267 | /* Callback interface. Currently only the progress indicator ("nprogress") 268 | * is used, but this may change in a future release. */ 269 | 270 | struct lzo_callback_t; 271 | typedef struct lzo_callback_t lzo_callback_t; 272 | #define lzo_callback_p lzo_callback_t __LZO_MMODEL * 273 | 274 | /* malloc & free function types */ 275 | typedef lzo_voidp (__LZO_CDECL *lzo_alloc_func_t) 276 | (lzo_callback_p self, lzo_uint items, lzo_uint size); 277 | typedef void (__LZO_CDECL *lzo_free_func_t) 278 | (lzo_callback_p self, lzo_voidp ptr); 279 | 280 | /* a progress indicator callback function */ 281 | typedef void (__LZO_CDECL *lzo_progress_func_t) 282 | (lzo_callback_p, lzo_uint, lzo_uint, int); 283 | 284 | struct lzo_callback_t 285 | { 286 | /* custom allocators (set to 0 to disable) */ 287 | lzo_alloc_func_t nalloc; /* [not used right now] */ 288 | lzo_free_func_t nfree; /* [not used right now] */ 289 | 290 | /* a progress indicator callback function (set to 0 to disable) */ 291 | lzo_progress_func_t nprogress; 292 | 293 | /* INFO: the first parameter "self" of the nalloc/nfree/nprogress 294 | * callbacks points back to this struct, so you are free to store 295 | * some extra info in the following variables. */ 296 | lzo_voidp user1; 297 | lzo_xint user2; 298 | lzo_xint user3; 299 | }; 300 | 301 | 302 | /*********************************************************************** 303 | // error codes and prototypes 304 | ************************************************************************/ 305 | 306 | /* Error codes for the compression/decompression functions. Negative 307 | * values are errors, positive values will be used for special but 308 | * normal events. 309 | */ 310 | #define LZO_E_OK 0 311 | #define LZO_E_ERROR (-1) 312 | #define LZO_E_OUT_OF_MEMORY (-2) /* [lzo_alloc_func_t failure] */ 313 | #define LZO_E_NOT_COMPRESSIBLE (-3) /* [not used right now] */ 314 | #define LZO_E_INPUT_OVERRUN (-4) 315 | #define LZO_E_OUTPUT_OVERRUN (-5) 316 | #define LZO_E_LOOKBEHIND_OVERRUN (-6) 317 | #define LZO_E_EOF_NOT_FOUND (-7) 318 | #define LZO_E_INPUT_NOT_CONSUMED (-8) 319 | #define LZO_E_NOT_YET_IMPLEMENTED (-9) /* [not used right now] */ 320 | #define LZO_E_INVALID_ARGUMENT (-10) 321 | #define LZO_E_INVALID_ALIGNMENT (-11) /* pointer argument is not properly aligned */ 322 | #define LZO_E_OUTPUT_NOT_CONSUMED (-12) 323 | #define LZO_E_INTERNAL_ERROR (-99) 324 | 325 | 326 | #ifndef lzo_sizeof_dict_t 327 | # define lzo_sizeof_dict_t ((unsigned)sizeof(lzo_bytep)) 328 | #endif 329 | 330 | /* lzo_init() should be the first function you call. 331 | * Check the return code ! 332 | * 333 | * lzo_init() is a macro to allow checking that the library and the 334 | * compiler's view of various types are consistent. 335 | */ 336 | #define lzo_init() __lzo_init_v2(LZO_VERSION,(int)sizeof(short),(int)sizeof(int),\ 337 | (int)sizeof(long),(int)sizeof(lzo_uint32_t),(int)sizeof(lzo_uint),\ 338 | (int)lzo_sizeof_dict_t,(int)sizeof(char *),(int)sizeof(lzo_voidp),\ 339 | (int)sizeof(lzo_callback_t)) 340 | LZO_EXTERN(int) __lzo_init_v2(unsigned,int,int,int,int,int,int,int,int,int); 341 | 342 | /* version functions (useful for shared libraries) */ 343 | LZO_EXTERN(unsigned) lzo_version(void); 344 | LZO_EXTERN(const char *) lzo_version_string(void); 345 | LZO_EXTERN(const char *) lzo_version_date(void); 346 | LZO_EXTERN(const lzo_charp) _lzo_version_string(void); 347 | LZO_EXTERN(const lzo_charp) _lzo_version_date(void); 348 | 349 | /* string functions */ 350 | LZO_EXTERN(int) 351 | lzo_memcmp(const lzo_voidp a, const lzo_voidp b, lzo_uint len); 352 | LZO_EXTERN(lzo_voidp) 353 | lzo_memcpy(lzo_voidp dst, const lzo_voidp src, lzo_uint len); 354 | LZO_EXTERN(lzo_voidp) 355 | lzo_memmove(lzo_voidp dst, const lzo_voidp src, lzo_uint len); 356 | LZO_EXTERN(lzo_voidp) 357 | lzo_memset(lzo_voidp buf, int c, lzo_uint len); 358 | 359 | /* checksum functions */ 360 | LZO_EXTERN(lzo_uint32_t) 361 | lzo_adler32(lzo_uint32_t c, const lzo_bytep buf, lzo_uint len); 362 | LZO_EXTERN(lzo_uint32_t) 363 | lzo_crc32(lzo_uint32_t c, const lzo_bytep buf, lzo_uint len); 364 | LZO_EXTERN(const lzo_uint32_tp) 365 | lzo_get_crc32_table(void); 366 | 367 | /* misc. */ 368 | LZO_EXTERN(int) _lzo_config_check(void); 369 | typedef union { 370 | lzo_voidp a00; lzo_bytep a01; lzo_uint a02; lzo_xint a03; lzo_uintptr_t a04; 371 | void *a05; unsigned char *a06; unsigned long a07; size_t a08; ptrdiff_t a09; 372 | #if defined(lzo_int64_t) 373 | lzo_uint64_t a10; 374 | #endif 375 | } lzo_align_t; 376 | 377 | /* align a char pointer on a boundary that is a multiple of 'size' */ 378 | LZO_EXTERN(unsigned) __lzo_align_gap(const lzo_voidp p, lzo_uint size); 379 | #define LZO_PTR_ALIGN_UP(p,size) \ 380 | ((p) + (lzo_uint) __lzo_align_gap((const lzo_voidp)(p),(lzo_uint)(size))) 381 | 382 | 383 | /*********************************************************************** 384 | // deprecated macros - only for backward compatibility 385 | ************************************************************************/ 386 | 387 | /* deprecated - use 'lzo_bytep' instead of 'lzo_byte *' */ 388 | #define lzo_byte unsigned char 389 | /* deprecated type names */ 390 | #define lzo_int32 lzo_int32_t 391 | #define lzo_uint32 lzo_uint32_t 392 | #define lzo_int32p lzo_int32_t __LZO_MMODEL * 393 | #define lzo_uint32p lzo_uint32_t __LZO_MMODEL * 394 | #define LZO_INT32_MAX LZO_INT32_C(2147483647) 395 | #define LZO_UINT32_MAX LZO_UINT32_C(4294967295) 396 | #if defined(lzo_int64_t) 397 | #define lzo_int64 lzo_int64_t 398 | #define lzo_uint64 lzo_uint64_t 399 | #define lzo_int64p lzo_int64_t __LZO_MMODEL * 400 | #define lzo_uint64p lzo_uint64_t __LZO_MMODEL * 401 | #define LZO_INT64_MAX LZO_INT64_C(9223372036854775807) 402 | #define LZO_UINT64_MAX LZO_UINT64_C(18446744073709551615) 403 | #endif 404 | /* deprecated types */ 405 | typedef union { lzo_bytep a; lzo_uint b; } __lzo_pu_u; 406 | typedef union { lzo_bytep a; lzo_uint32_t b; } __lzo_pu32_u; 407 | /* deprecated defines */ 408 | #if !defined(LZO_SIZEOF_LZO_UINT) 409 | # define LZO_SIZEOF_LZO_UINT LZO_SIZEOF_LZO_INT 410 | #endif 411 | 412 | #if defined(LZO_CFG_COMPAT) 413 | 414 | #define __LZOCONF_H 1 415 | 416 | #if defined(LZO_ARCH_I086) 417 | # define __LZO_i386 1 418 | #elif defined(LZO_ARCH_I386) 419 | # define __LZO_i386 1 420 | #endif 421 | 422 | #if defined(LZO_OS_DOS16) 423 | # define __LZO_DOS 1 424 | # define __LZO_DOS16 1 425 | #elif defined(LZO_OS_DOS32) 426 | # define __LZO_DOS 1 427 | #elif defined(LZO_OS_WIN16) 428 | # define __LZO_WIN 1 429 | # define __LZO_WIN16 1 430 | #elif defined(LZO_OS_WIN32) 431 | # define __LZO_WIN 1 432 | #endif 433 | 434 | #define __LZO_CMODEL /*empty*/ 435 | #define __LZO_DMODEL /*empty*/ 436 | #define __LZO_ENTRY __LZO_CDECL 437 | #define LZO_EXTERN_CDECL LZO_EXTERN 438 | #define LZO_ALIGN LZO_PTR_ALIGN_UP 439 | 440 | #define lzo_compress_asm_t lzo_compress_t 441 | #define lzo_decompress_asm_t lzo_decompress_t 442 | 443 | #endif /* LZO_CFG_COMPAT */ 444 | 445 | 446 | #ifdef __cplusplus 447 | } /* extern "C" */ 448 | #endif 449 | 450 | #endif /* already included */ 451 | 452 | 453 | /* vim:set ts=4 sw=4 et: */ 454 | -------------------------------------------------------------------------------- /test/minilzo-2.10/minilzo.h: -------------------------------------------------------------------------------- 1 | /* minilzo.h -- mini subset of the LZO real-time data compression library 2 | 3 | This file is part of the LZO real-time data compression library. 4 | 5 | Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer 6 | All Rights Reserved. 7 | 8 | The LZO library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU General Public License as 10 | published by the Free Software Foundation; either version 2 of 11 | the License, or (at your option) any later version. 12 | 13 | The LZO library is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with the LZO library; see the file COPYING. 20 | If not, write to the Free Software Foundation, Inc., 21 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 22 | 23 | Markus F.X.J. Oberhumer 24 | 25 | http://www.oberhumer.com/opensource/lzo/ 26 | */ 27 | 28 | /* 29 | * NOTE: 30 | * the full LZO package can be found at 31 | * http://www.oberhumer.com/opensource/lzo/ 32 | */ 33 | 34 | 35 | #ifndef __MINILZO_H_INCLUDED 36 | #define __MINILZO_H_INCLUDED 1 37 | 38 | #define MINILZO_VERSION 0x20a0 /* 2.10 */ 39 | 40 | #if defined(__LZOCONF_H_INCLUDED) 41 | # error "you cannot use both LZO and miniLZO" 42 | #endif 43 | 44 | /* internal Autoconf configuration file - only used when building miniLZO */ 45 | #ifdef MINILZO_HAVE_CONFIG_H 46 | # include 47 | #endif 48 | #include 49 | #include 50 | 51 | #ifndef __LZODEFS_H_INCLUDED 52 | #include "lzodefs.h" 53 | #endif 54 | #undef LZO_HAVE_CONFIG_H 55 | #include "lzoconf.h" 56 | 57 | #if !defined(LZO_VERSION) || (LZO_VERSION != MINILZO_VERSION) 58 | # error "version mismatch in header files" 59 | #endif 60 | 61 | 62 | #ifdef __cplusplus 63 | extern "C" { 64 | #endif 65 | 66 | 67 | /*********************************************************************** 68 | // 69 | ************************************************************************/ 70 | 71 | /* Memory required for the wrkmem parameter. 72 | * When the required size is 0, you can also pass a NULL pointer. 73 | */ 74 | 75 | #define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS 76 | #define LZO1X_1_MEM_COMPRESS ((lzo_uint32_t) (16384L * lzo_sizeof_dict_t)) 77 | #define LZO1X_MEM_DECOMPRESS (0) 78 | 79 | 80 | /* compression */ 81 | LZO_EXTERN(int) 82 | lzo1x_1_compress ( const lzo_bytep src, lzo_uint src_len, 83 | lzo_bytep dst, lzo_uintp dst_len, 84 | lzo_voidp wrkmem ); 85 | 86 | /* decompression */ 87 | LZO_EXTERN(int) 88 | lzo1x_decompress ( const lzo_bytep src, lzo_uint src_len, 89 | lzo_bytep dst, lzo_uintp dst_len, 90 | lzo_voidp wrkmem /* NOT USED */ ); 91 | 92 | /* safe decompression with overrun testing */ 93 | LZO_EXTERN(int) 94 | lzo1x_decompress_safe ( const lzo_bytep src, lzo_uint src_len, 95 | lzo_bytep dst, lzo_uintp dst_len, 96 | lzo_voidp wrkmem /* NOT USED */ ); 97 | 98 | 99 | #ifdef __cplusplus 100 | } /* extern "C" */ 101 | #endif 102 | 103 | #endif /* already included */ 104 | 105 | 106 | /* vim:set ts=4 sw=4 et: */ 107 | -------------------------------------------------------------------------------- /test/minilzo-2.10/minilzo.ml: -------------------------------------------------------------------------------- 1 | exception LZO of string 2 | 3 | let _ = Callback.register_exception "lzo" (LZO "Invalid LZO input/output") 4 | 5 | external compress : 6 | src:string -> src_off:int -> src_len:int -> dst:bytes -> dst_off:int -> int 7 | = "caml_lzo1x_1_compress" 8 | 9 | external uncompress : 10 | src:string -> src_off:int -> src_len:int -> dst:bytes -> dst_off:int -> int 11 | = "caml_lzo1x_decompress" 12 | 13 | let minilzo () = 14 | let str = String.make 256 'a' in 15 | let buf = Bytes.create 65536 in 16 | let len = compress ~src:str ~src_off:0 ~src_len:256 ~dst:buf ~dst_off:0 in 17 | Format.eprintf ">>> len:%d\n%!" len 18 | ; let res = Bytes.sub_string buf 0 len in 19 | let buf = Bytes.create 256 in 20 | let len = uncompress ~src:res ~src_off:0 ~src_len:len ~dst:buf ~dst_off:0 in 21 | assert (str = Bytes.sub_string buf 0 len) 22 | -------------------------------------------------------------------------------- /test/minilzo-2.10/stubs.c: -------------------------------------------------------------------------------- 1 | #include "minilzo.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | CAMLprim value 10 | caml_lzo1x_1_compress(value v_src, value v_src_off, value v_src_len, value v_dst, value v_dst_off) { 11 | CAMLparam5(v_src, v_src_off, v_src_len, v_dst, v_dst_off); 12 | lzo_voidp wrkmem = malloc(LZO1X_1_MEM_COMPRESS); 13 | lzo_bytep src = Bytes_val (v_src) + Int_val (v_src_off); 14 | lzo_bytep dst = Bytes_val (v_dst) + Int_val (v_dst_off); 15 | lzo_uint dst_len = 0; 16 | int ret; 17 | 18 | ret = lzo1x_1_compress(src, Int_val (v_src_len), dst, &dst_len, wrkmem); 19 | free(wrkmem); 20 | 21 | if (ret != LZO_E_OK) 22 | caml_raise_with_string(*caml_named_value("lzo"), "Invalid LZO input"); 23 | 24 | CAMLreturn(Val_int (dst_len)); 25 | } 26 | 27 | CAMLprim value 28 | caml_lzo1x_decompress(value v_src, value v_src_off, value v_src_len, value v_dst, value v_dst_off) { 29 | CAMLparam5(v_src, v_src_off, v_src_len, v_dst, v_dst_off); 30 | lzo_bytep src = Bytes_val (v_src) + Int_val (v_src_off); 31 | lzo_bytep dst = Bytes_val (v_dst) + Int_val (v_dst_off); 32 | lzo_uint dst_len = 0; 33 | int ret; 34 | 35 | ret = lzo1x_decompress(src, Int_val (v_src_len), dst, &dst_len, NULL); 36 | 37 | if (ret != LZO_E_OK) 38 | caml_raise_with_string(*caml_named_value("lzo"), "Invalid LZO output"); 39 | 40 | CAMLreturn(Val_int (dst_len)); 41 | } 42 | -------------------------------------------------------------------------------- /test/minilzo-2.10/testmini.c: -------------------------------------------------------------------------------- 1 | /* testmini.c -- very simple test program for the miniLZO library 2 | 3 | This file is part of the LZO real-time data compression library. 4 | 5 | Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer 6 | All Rights Reserved. 7 | 8 | The LZO library is free software; you can redistribute it and/or 9 | modify it under the terms of the GNU General Public License as 10 | published by the Free Software Foundation; either version 2 of 11 | the License, or (at your option) any later version. 12 | 13 | The LZO library is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU General Public License for more details. 17 | 18 | You should have received a copy of the GNU General Public License 19 | along with the LZO library; see the file COPYING. 20 | If not, write to the Free Software Foundation, Inc., 21 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 22 | 23 | Markus F.X.J. Oberhumer 24 | 25 | http://www.oberhumer.com/opensource/lzo/ 26 | */ 27 | 28 | 29 | #include 30 | #include 31 | 32 | 33 | /************************************************************************* 34 | // This program shows the basic usage of the LZO library. 35 | // We will compress a block of data and decompress again. 36 | // 37 | // For more information, documentation, example programs and other support 38 | // files (like Makefiles and build scripts) please download the full LZO 39 | // package from 40 | // http://www.oberhumer.com/opensource/lzo/ 41 | **************************************************************************/ 42 | 43 | /* First let's include "minizo.h". */ 44 | 45 | #include "minilzo.h" 46 | 47 | 48 | /* We want to compress the data block at 'in' with length 'IN_LEN' to 49 | * the block at 'out'. Because the input block may be incompressible, 50 | * we must provide a little more output space in case that compression 51 | * is not possible. 52 | */ 53 | 54 | #define IN_LEN (128*1024ul) 55 | #define OUT_LEN (IN_LEN + IN_LEN / 16 + 64 + 3) 56 | 57 | static unsigned char __LZO_MMODEL in [ IN_LEN ]; 58 | static unsigned char __LZO_MMODEL out [ OUT_LEN ]; 59 | 60 | 61 | /* Work-memory needed for compression. Allocate memory in units 62 | * of 'lzo_align_t' (instead of 'char') to make sure it is properly aligned. 63 | */ 64 | 65 | #define HEAP_ALLOC(var,size) \ 66 | lzo_align_t __LZO_MMODEL var [ ((size) + (sizeof(lzo_align_t) - 1)) / sizeof(lzo_align_t) ] 67 | 68 | static HEAP_ALLOC(wrkmem, LZO1X_1_MEM_COMPRESS); 69 | 70 | 71 | /************************************************************************* 72 | // 73 | **************************************************************************/ 74 | 75 | int main(int argc, char *argv[]) 76 | { 77 | int r; 78 | lzo_uint in_len; 79 | lzo_uint out_len; 80 | lzo_uint new_len; 81 | 82 | if (argc < 0 && argv == NULL) /* avoid warning about unused args */ 83 | return 0; 84 | 85 | printf("\nLZO real-time data compression library (v%s, %s).\n", 86 | lzo_version_string(), lzo_version_date()); 87 | printf("Copyright (C) 1996-2017 Markus Franz Xaver Johannes Oberhumer\nAll Rights Reserved.\n\n"); 88 | 89 | 90 | /* 91 | * Step 1: initialize the LZO library 92 | */ 93 | if (lzo_init() != LZO_E_OK) 94 | { 95 | printf("internal error - lzo_init() failed !!!\n"); 96 | printf("(this usually indicates a compiler bug - try recompiling\nwithout optimizations, and enable '-DLZO_DEBUG' for diagnostics)\n"); 97 | return 3; 98 | } 99 | 100 | /* 101 | * Step 2: prepare the input block that will get compressed. 102 | * We just fill it with zeros in this example program, 103 | * but you would use your real-world data here. 104 | */ 105 | in_len = IN_LEN; 106 | lzo_memset(in,0,in_len); 107 | 108 | /* 109 | * Step 3: compress from 'in' to 'out' with LZO1X-1 110 | */ 111 | r = lzo1x_1_compress(in,in_len,out,&out_len,wrkmem); 112 | if (r == LZO_E_OK) 113 | printf("compressed %lu bytes into %lu bytes\n", 114 | (unsigned long) in_len, (unsigned long) out_len); 115 | else 116 | { 117 | /* this should NEVER happen */ 118 | printf("internal error - compression failed: %d\n", r); 119 | return 2; 120 | } 121 | /* check for an incompressible block */ 122 | if (out_len >= in_len) 123 | { 124 | printf("This block contains incompressible data.\n"); 125 | return 0; 126 | } 127 | 128 | /* 129 | * Step 4: decompress again, now going from 'out' to 'in' 130 | */ 131 | new_len = in_len; 132 | r = lzo1x_decompress(out,out_len,in,&new_len,NULL); 133 | if (r == LZO_E_OK && new_len == in_len) 134 | printf("decompressed %lu bytes back into %lu bytes\n", 135 | (unsigned long) out_len, (unsigned long) in_len); 136 | else 137 | { 138 | /* this should NEVER happen */ 139 | printf("internal error - decompression failed: %d\n", r); 140 | return 1; 141 | } 142 | 143 | printf("\nminiLZO simple compression test passed.\n"); 144 | return 0; 145 | } 146 | 147 | 148 | /* vim:set ts=4 sw=4 et: */ 149 | -------------------------------------------------------------------------------- /test/test_deflate.ml: -------------------------------------------------------------------------------- 1 | open De 2 | 3 | let q = Queue.create 0x4000 4 | let o = bigstring_create io_buffer_size 5 | let t = bigstring_create io_buffer_size 6 | let w0 = Lz77.make_window ~bits:15 7 | let w1 = make_window ~bits:15 8 | 9 | let load_file filename = 10 | let ic = open_in filename in 11 | let ln = in_channel_length ic in 12 | let rs = really_input_string ic ln in 13 | close_in ic ; rs 14 | 15 | let compare_files a b = 16 | let x = load_file a and y = load_file b in 17 | Alcotest.(check string) (Fmt.str "%s:%s" a b) x y 18 | 19 | let deflate_with_level ~level filename = 20 | Alcotest.test_case (Fmt.str "%s (level: %d)" filename level) `Quick 21 | @@ fun () -> 22 | let ic = open_in filename in 23 | let oc = open_out (filename ^ ".o") in 24 | let state = Lz77.state ~level ~q ~w:w0 (`Channel ic) in 25 | let encoder = Def.encoder `Manual ~q in 26 | let decoder = Inf.decoder `Manual ~o ~w:w1 in 27 | let rec compress () = 28 | match De.Lz77.compress state with 29 | | `Await -> assert false 30 | | `Flush -> 31 | let literals = Lz77.literals state in 32 | let distances = Lz77.distances state in 33 | Fmt.epr "[compress]: `Flush.\n%!" 34 | ; encode 35 | @@ Def.encode encoder 36 | (`Block 37 | { 38 | Def.kind= 39 | Dynamic (Def.dynamic_of_frequencies ~literals ~distances) 40 | ; last= false 41 | }) 42 | | `End -> 43 | Fmt.epr "[compress]: `End.\n%!" 44 | ; close_in ic 45 | ; encode_rest @@ Def.encode encoder (`Block {Def.kind= Fixed; last= true}) 46 | and encode_rest = function 47 | | (`Partial | `Ok) as res -> 48 | let len = bigstring_length t - Def.dst_rem encoder in 49 | Fmt.epr "[pending]: `Partial (%d byte(s)).\n%!" len 50 | ; Inf.src decoder t 0 len 51 | ; decode_rest (res = `Ok) @@ Inf.decode decoder 52 | | `Block -> assert false 53 | and encode = function 54 | | `Partial -> 55 | let len = bigstring_length t - Def.dst_rem encoder in 56 | Fmt.epr "[encode]: `Partial (%d byte(s)).\n%!" len 57 | ; Inf.src decoder t 0 len 58 | ; decode @@ Inf.decode decoder 59 | | `Ok -> 60 | Fmt.epr "[encode] `Ok.\n%!" 61 | ; compress () 62 | | `Block -> 63 | Fmt.epr "[encode] `Ok.\n%!" 64 | ; compress () 65 | and decode_rest finalize = function 66 | | `Await when finalize -> 67 | Inf.src decoder t 0 0 68 | ; decode_rest finalize @@ Inf.decode decoder 69 | | `Await -> 70 | Def.dst encoder t 0 (bigstring_length t) 71 | ; encode_rest @@ Def.encode encoder `Await 72 | | (`Flush | `End) as state -> 73 | let len = bigstring_length o - Inf.dst_rem decoder in 74 | let str = Bigstringaf.substring o ~off:0 ~len in 75 | output_string oc str 76 | ; if state = `Flush then ( 77 | Inf.flush decoder 78 | ; decode_rest finalize @@ Inf.decode decoder) 79 | else close_out oc 80 | | `Malformed err -> Alcotest.failf "Malformed compressed input: %S" err 81 | and decode = function 82 | | `Await -> 83 | Def.dst encoder t 0 (bigstring_length t) 84 | ; encode @@ Def.encode encoder `Await 85 | | (`Flush | `End) as state -> 86 | let len = bigstring_length o - Inf.dst_rem decoder in 87 | let str = Bigstringaf.substring o ~off:0 ~len in 88 | output_string oc str 89 | ; if state = `Flush then ( 90 | Inf.flush decoder 91 | ; decode @@ Inf.decode decoder) 92 | else close_out oc 93 | | `Malformed err -> Alcotest.failf "Malformed compressed input: %S" err 94 | in 95 | Def.dst encoder t 0 (bigstring_length t) 96 | ; Queue.reset q 97 | ; compress () 98 | ; compare_files filename (filename ^ ".o") 99 | 100 | let corpus = 101 | [ 102 | "corpus/bib"; "corpus/book1"; "corpus/book2"; "corpus/geo"; "corpus/news" 103 | ; "corpus/obj1"; "corpus/obj2"; "corpus/paper1"; "corpus/paper2"; "corpus/pic" 104 | ; "corpus/progc"; "corpus/progl"; "corpus/progp"; "corpus/trans" 105 | ] 106 | 107 | let () = 108 | Alcotest.run "lz" 109 | [ 110 | "0", List.map (deflate_with_level ~level:0) corpus 111 | ; "1", List.map (deflate_with_level ~level:1) corpus 112 | ; "2", List.map (deflate_with_level ~level:2) corpus 113 | ; "3", List.map (deflate_with_level ~level:3) corpus 114 | ; "4", List.map (deflate_with_level ~level:4) corpus 115 | ; "5", List.map (deflate_with_level ~level:5) corpus 116 | ; "6", List.map (deflate_with_level ~level:6) corpus 117 | ; "7", List.map (deflate_with_level ~level:7) corpus 118 | ; "8", List.map (deflate_with_level ~level:8) corpus 119 | ; "9", List.map (deflate_with_level ~level:9) corpus 120 | ] 121 | --------------------------------------------------------------------------------