├── .merlin ├── .gitignore ├── .travis.yml ├── _oasis ├── opam ├── COPYING ├── src ├── test_inflate.ml ├── BS.ml ├── dump_zlib.ml └── inflate.ml └── README.md /.merlin: -------------------------------------------------------------------------------- 1 | S src 2 | B _build/src 3 | PKG bitstring 4 | PKG oUnit 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /_build/ 2 | _tags 3 | META 4 | Makefile 5 | configure 6 | myocamlbuild.ml 7 | *.byte 8 | setup.* 9 | *.mldylib 10 | *.mllib 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | install: wget https://raw.githubusercontent.com/ocaml/ocaml-travisci-skeleton/master/.travis-opam.sh 3 | script: bash -ex .travis-opam.sh 4 | env: 5 | - OCAML_VERSION=4.02 PACKAGE=escalate EXTRA_DEPS="oasis" PRE_INSTALL_HOOK="oasis setup" 6 | -------------------------------------------------------------------------------- /_oasis: -------------------------------------------------------------------------------- 1 | OASISFormat: 0.4 2 | Name: escalate 3 | Version: 0.1.0 4 | Synopsis: Implementation of the Deflate algorithm in pure OCaml 5 | Authors: Marek Kubica 6 | License: ZLIB 7 | Plugins: META (0.4), DevFiles (0.4) 8 | 9 | Library escalate 10 | Path: src 11 | BuildTools: ocamlbuild 12 | BuildDepends: bitstring, bitstring.syntax 13 | Modules: Inflate 14 | 15 | Executable "test-inflate" 16 | Path: src 17 | BuildTools: ocamlbuild 18 | BuildDepends: oUnit, bitstring 19 | MainIs: test_inflate.ml 20 | 21 | Executable "dump-zlib" 22 | Path: src 23 | BuildTools: ocamlbuild 24 | BuildDepends: bitstring 25 | MainIs: dump_zlib.ml 26 | -------------------------------------------------------------------------------- /opam: -------------------------------------------------------------------------------- 1 | opam-version: "1" 2 | maintainer: "marek@xivilization.net" 3 | homepage: "https://github.com/Leonidas-from-XIV/escalate" 4 | license: "ZLIB" 5 | build: [ 6 | # for development. Not to be included in opam-repository 7 | ["oasis" "setup"] 8 | # /development 9 | ["ocaml" "setup.ml" "-configure" "--prefix" prefix] 10 | ["ocaml" "setup.ml" "-build"] 11 | ["ocaml" "setup.ml" "-install"] 12 | ] 13 | remove: [ 14 | ["ocamlfind" "remove" "escalate"] 15 | ] 16 | depends: [ 17 | "ocamlfind" 18 | "bitstring" {>= "2.0.4"} 19 | "ounit" {>= "2.0.0"} 20 | # for development. Not to be included in opam-repository 21 | "oasis" {build & >= "0.4.0"} 22 | # /development 23 | ] 24 | available: [ ocaml-version >= "4.02.0" ] 25 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010, 2015 Marek Kubica 2 | 3 | This software is provided 'as-is', without any express or implied 4 | warranty. In no event will the authors be held liable for any damages 5 | arising from the use of this software. 6 | 7 | Permission is granted to anyone to use this software for any purpose, 8 | including commercial applications, and to alter it and redistribute it 9 | freely, subject to the following restrictions: 10 | 11 | 1. The origin of this software must not be misrepresented; you must not 12 | claim that you wrote the original software. If you use this software 13 | in a product, an acknowledgment in the product documentation would be 14 | appreciated but is not required. 15 | 16 | 2. Altered source versions must be plainly marked as such, and must not be 17 | misrepresented as being the original software. 18 | 19 | 3. This notice may not be removed or altered from any source 20 | distribution. 21 | -------------------------------------------------------------------------------- /src/test_inflate.ml: -------------------------------------------------------------------------------- 1 | open OUnit2 2 | open Inflate 3 | 4 | let zlib_compressed = "x\x9cs\xcaI\xccPH\xc2$\x14\x01o\x19\x08u" 5 | let plaintext = "Blah blah blah blah blah!" 6 | 7 | let read_zlib_header expected function_got _ = 8 | let header, _, _ = parse_zlib zlib_compressed in 9 | let assert_header = assert_equal in 10 | assert_header expected (function_got header) 11 | 12 | let inflate_blah _ = 13 | let printer = Some (fun x -> x) in 14 | assert_equal ?printer plaintext (inflate zlib_compressed) 15 | 16 | let suite = "Inflate" >::: [ 17 | "inflate_blah" >:: inflate_blah; 18 | "read_zlib_header_cm" >:: read_zlib_header Deflate (fun h -> h.compression_method); 19 | "read_zlib_header_cinfo" >:: read_zlib_header 32768 (fun h -> h.window_size); 20 | "read_zlib_header_flevel" >:: read_zlib_header Default (fun h -> h.compression_level); 21 | "read_zlib_header_fcheck" >:: read_zlib_header 28 (fun h -> h.fcheck); 22 | "read_zlib_header_checksum" >:: read_zlib_header 30876 (fun h -> 23 | h.checksum) 24 | ] 25 | 26 | let _ = run_test_tt_main suite 27 | -------------------------------------------------------------------------------- /src/BS.ml: -------------------------------------------------------------------------------- 1 | (** 2 | * Thin wrapper around the Bitstring module. 3 | * Copyright (c) 2015 Marek Kubica 4 | * 5 | * This software is provided 'as-is', without any express or implied 6 | * warranty. In no event will the authors be held liable for any damages 7 | * arising from the use of this software. 8 | * 9 | * Permission is granted to anyone to use this software for any purpose, 10 | * including commercial applications, and to alter it and redistribute it 11 | * freely, subject to the following restrictions: 12 | * 13 | * 1. The origin of this software must not be misrepresented; you must not 14 | * claim that you wrote the original software. If you use this software 15 | * in a product, an acknowledgment in the product documentation would be 16 | * appreciated but is not required. 17 | * 18 | * 2. Altered source versions must be plainly marked as such, and must not be 19 | * misrepresented as being the original software. 20 | * 21 | * 3. This notice may not be removed or altered from any source 22 | * distribution. 23 | *) 24 | 25 | (* make the naming a tad less cumbersome *) 26 | include Bitstring 27 | let of_string = bitstring_of_string 28 | let to_string = string_of_bitstring 29 | let take = takebits 30 | let drop = dropbits 31 | let hexdump = hexdump_bitstring 32 | -------------------------------------------------------------------------------- /src/dump_zlib.ml: -------------------------------------------------------------------------------- 1 | open Inflate 2 | 3 | let compression_level = function 4 | | Fastest -> "Fastest" 5 | | Fast -> "Fast" 6 | | Default -> "Default" 7 | | Maximum -> "Maximum" 8 | 9 | let final = function 10 | | Last -> " (final)" 11 | | Continues -> "" 12 | 13 | let encoding = function 14 | | FixedHuffman _ -> "fixed Huffman" 15 | | DynamicHuffman -> "dynamic Huffman" 16 | | Uncompressed _ -> "uncompressed" 17 | 18 | let pp_payload = function 19 | | Literal x -> Printf.printf "Literal '%c'\n" x; 20 | | Repeat (len, dist) -> Printf.printf "Repeat \n" 21 | len dist 22 | 23 | let pp_segments = function 24 | (continues, blocks) -> 25 | Printf.printf "New block%s in %s encoding:\n" (final continues) 26 | (encoding blocks); 27 | match blocks with 28 | | FixedHuffman payload -> List.iter pp_payload payload 29 | | DynamicHuffman -> Printf.printf "Dynamic Huffman TODO\n" 30 | | Uncompressed bytes -> Printf.printf "Uncompressed %d bytes\n" @@ 31 | String.length bytes 32 | 33 | let () = 34 | let file_name = Array.get Sys.argv 1 in 35 | let chan = open_in_bin file_name in 36 | let n = in_channel_length chan in 37 | let s = Bytes.create n in 38 | really_input chan s 0 n; 39 | close_in chan; 40 | let header, segments, adler32 = Inflate.parse_zlib s in 41 | Printf.printf "Compression level: %s\n" @@ compression_level 42 | header.compression_level; 43 | Printf.printf "Window size: %d\n" header.window_size; 44 | Printf.printf "Dict: %B\n" header.fdict; 45 | List.iter pp_segments segments; 46 | Printf.printf "ADLER32: %lu\n" adler32; 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Escalate 2 | ======== 3 | 4 | Pure OCaml data compression. 5 | 6 | [![Build Status](https://travis-ci.org/Leonidas-from-XIV/escalate.svg?branch=master)](https://travis-ci.org/Leonidas-from-XIV/escalate) 7 | 8 | Rationale 9 | --------- 10 | 11 | `zlib` is a de-facto key library of the internet, as the DEFLATE compression 12 | and its container formats ZLIB and GZIP are ubiquitous. This library tries to 13 | supply a type-safe, memory safe alternative to `zlib`. 14 | 15 | 16 | Format support 17 | -------------- 18 | 19 | Parsing deflated files is currently in the works. 20 | 21 | | Inflate support | partly | 22 | |-----------------|--------| 23 | | Uncompressed | yes | 24 | | Fixed Huffman | yes | 25 | | Dynamic Huffman | no | 26 | 27 | Compressing is not yet implemented. 28 | 29 | | Deflate support | no | 30 | |-----------------|--------| 31 | | Uncompressed | no | 32 | | Fixed Huffman | no | 33 | | Dynamic Huffman | no | 34 | 35 | DEFLATE streams can be part of a number of containers. The most common ones are 36 | planned to be implemented. 37 | 38 | | Container support | partly | 39 | |-------------------|--------| 40 | | ZLIB | yes | 41 | | GZIP | no | 42 | | ZIP | no | 43 | 44 | 45 | Installation 46 | ------------ 47 | 48 | You want to try it out? Cool, here are the the build instructions. At best you 49 | get your OCaml via [OPAM](https://opam.ocaml.org/). Eventually, once this 50 | library matures, it will also be available from OPAM directly! 51 | 52 | If you want to build the code from the Git repository, you should follow my 53 | footsteps and use these commands: 54 | 55 | ```sh 56 | opam install oasis bitstring ounit 57 | oasis setup 58 | ocaml setup.ml -configure 59 | ocaml setup.ml -build 60 | ``` 61 | 62 | This will get you the compiled binaries. 63 | 64 | 65 | License 66 | ------- 67 | 68 | The license of `escalate` is the same as of `zlib`, the so-called zlib license, 69 | for convenience reasons. If you can use `zlib` in your program, you can use 70 | `escalate`. 71 | -------------------------------------------------------------------------------- /src/inflate.ml: -------------------------------------------------------------------------------- 1 | (** 2 | * decompress DEFLATE streams 3 | * Copyright (c) 2010, 2015 Marek Kubica 4 | * 5 | * This software is provided 'as-is', without any express or implied 6 | * warranty. In no event will the authors be held liable for any damages 7 | * arising from the use of this software. 8 | * 9 | * Permission is granted to anyone to use this software for any purpose, 10 | * including commercial applications, and to alter it and redistribute it 11 | * freely, subject to the following restrictions: 12 | * 13 | * 1. The origin of this software must not be misrepresented; you must not 14 | * claim that you wrote the original software. If you use this software 15 | * in a product, an acknowledgment in the product documentation would be 16 | * appreciated but is not required. 17 | * 18 | * 2. Altered source versions must be plainly marked as such, and must not be 19 | * misrepresented as being the original software. 20 | * 21 | * 3. This notice may not be removed or altered from any source 22 | * distribution. 23 | *) 24 | 25 | (* Huffman *) 26 | type distance = int 27 | type length = int 28 | type huffman_elements = Literal of char | Repeat of length * distance 29 | 30 | (* DEFLATE *) 31 | type blocks = 32 | | Uncompressed of string 33 | | FixedHuffman of huffman_elements list 34 | | DynamicHuffman 35 | type block_final = Continues | Last 36 | 37 | (* ZLIB *) 38 | type compression_method = Deflate 39 | type window_size = int 40 | type dict_present = bool 41 | type compression_level = Fastest | Fast | Default | Maximum 42 | 43 | type zlib_header = { 44 | compression_method : compression_method; 45 | window_size : int; 46 | compression_level : compression_level; 47 | fdict : bool; 48 | fcheck : int; 49 | checksum : int 50 | } 51 | 52 | (* https://stackoverflow.com/questions/2602823/ *) 53 | let tab = [|0x0; 0x8; 0x4; 0xc; 0x2; 0xa; 0x6; 0xe; 54 | 0x1; 0x9; 0x5; 0xd; 0x3; 0xb; 0x7; 0xf|] 55 | 56 | let reverse_bits byte = 57 | let lookup = Array.get tab in 58 | ((lookup (byte land 0xf)) lsl 4) lor lookup (byte lsr 4) 59 | 60 | let reverse_string_bits s = 61 | String.map (fun x -> Char.chr @@ reverse_bits @@ Char.code x) s 62 | 63 | (** Skips to next complete byte. Does nothing if it is aligned to a byte. *) 64 | let align_to_next_byte bits = 65 | let _, offset, _ = bits in 66 | let byte_offset = offset mod 8 in 67 | if byte_offset = 0 then bits else 68 | BS.drop (8 - byte_offset) bits 69 | 70 | let compression_level = function 71 | | 0 -> Fastest 72 | | 1 -> Fast 73 | | 2 -> Default 74 | | 3 -> Maximum 75 | | _ -> failwith "Invalid compression level" 76 | 77 | let parse_zlib_header bits = 78 | bitmatch bits with 79 | | { 80 | cinfo : 4; 81 | cm : 4; 82 | flevel : 2; 83 | fdict : 1; 84 | fcheck : 5 85 | } -> bitmatch bits with 86 | | {checksum : 16} -> 87 | if cm <> 8 then failwith "Invalid compression method" else 88 | { 89 | compression_method = Deflate; 90 | window_size = int_of_float @@ 2. ** (float_of_int cinfo +. 8.); 91 | compression_level = compression_level flevel; 92 | fdict = fdict; 93 | fcheck = fcheck; 94 | checksum = checksum 95 | } 96 | 97 | let final_block = function 98 | | true -> Last 99 | | _ -> Continues 100 | 101 | let length_code = function 102 | | 257 -> (0, 3) 103 | | 258 -> (0, 4) 104 | | 259 -> (0, 5) 105 | | 260 -> (0, 6) 106 | | 261 -> (0, 7) 107 | | 262 -> (0, 8) 108 | | 263 -> (0, 9) 109 | | 264 -> (0, 10) 110 | | 265 -> (1, 11) 111 | | 266 -> (1, 13) 112 | | 267 -> (1, 15) 113 | | 268 -> (1, 17) 114 | | 269 -> (2, 19) 115 | | 270 -> (2, 23) 116 | | 271 -> (2, 27) 117 | | 272 -> (2, 31) 118 | | 273 -> (3, 35) 119 | | 274 -> (3, 43) 120 | | 275 -> (3, 51) 121 | | 276 -> (3, 59) 122 | | 277 -> (4, 67) 123 | | 278 -> (4, 83) 124 | | 279 -> (4, 99) 125 | | 280 -> (4, 115) 126 | | 281 -> (5, 131) 127 | | 282 -> (5, 163) 128 | | 283 -> (5, 195) 129 | | 284 -> (5, 227) 130 | | 285 -> (0, 258) 131 | | _ -> failwith "Invalid length code" 132 | 133 | let distance_code = function 134 | | 0 -> (0, 1) 135 | | 1 -> (0, 2) 136 | | 2 -> (0, 3) 137 | | 3 -> (0, 4) 138 | | 4 -> (1, 5) 139 | | 5 -> (1, 7) 140 | | 6 -> (2, 9) 141 | | 7 -> (2, 13) 142 | | 8 -> (3, 17) 143 | | 9 -> (3, 25) 144 | | 10 -> (4, 33) 145 | | 11 -> (4, 49) 146 | | 12 -> (5, 65) 147 | | 13 -> (5, 97) 148 | | 14 -> (6, 129) 149 | | 15 -> (6, 193) 150 | | 16 -> (7, 257) 151 | | 17 -> (7, 385) 152 | | 18 -> (8, 513) 153 | | 19 -> (8, 769) 154 | | 20 -> (9, 1025) 155 | | 21 -> (9, 1537) 156 | | 22 -> (10, 2049) 157 | | 23 -> (10, 3073) 158 | | 24 -> (11, 4097) 159 | | 25 -> (11, 6145) 160 | | 26 -> (12, 8193) 161 | | 27 -> (12, 12289) 162 | | 28 -> (13, 16385) 163 | | 29 -> (13, 24577) 164 | | _ -> failwith "Invalid distance code" 165 | 166 | let read_reversed width num = 167 | let rec reconstruct shifts num acc = 168 | match shifts with 169 | | 0 -> acc 170 | | n -> let bit = num land 0x1 in 171 | let acc = (bit lsl (n-1)) lor acc in 172 | (* Printf.printf "Bit %d Shift %d Res %d\n" bit n acc; *) 173 | reconstruct (n-1) (num lsr 1) acc 174 | in 175 | reconstruct width num 0 176 | 177 | let rec decode_huffman bits = 178 | bitmatch bits with 179 | (* Literal 256, termination *) 180 | | { element : 7; 181 | rest : -1 : bitstring } when element = 0 -> 182 | ([], rest) 183 | (* Literal 257 - 279, distance code *) 184 | | { element : 7; 185 | rest : -1 : bitstring } when element > 0 && element <= 23 -> 186 | (* Printf.printf "Element %d\n" element; *) 187 | let extra_bits, length_start = length_code @@ element + 256 in 188 | (* Printf.printf "Extra bits %d, offset start %d\n" extra_bits length_start; *) 189 | 190 | let length, rest = bitmatch rest with 191 | | { length : extra_bits; 192 | rest : -1 : bitstring } -> 193 | let length = read_reversed extra_bits (Int64.to_int length) in 194 | (length + length_start, rest) 195 | in 196 | 197 | let distance, rest = bitmatch rest with 198 | | { distance : 5; 199 | rest : -1 : bitstring } -> 200 | (* Printf.printf "Distance code %d\n" distance; *) 201 | let extra_bits, distance_start = distance_code distance in 202 | bitmatch rest with 203 | | { distance : extra_bits; 204 | rest : -1 : bitstring } -> 205 | let distance = read_reversed extra_bits (Int64.to_int distance) in 206 | (distance + distance_start, rest) 207 | in 208 | (* Printf.printf "Distance %d\n" distance; *) 209 | 210 | let decoded, rest = decode_huffman rest in 211 | (Repeat (length, distance)::decoded, rest) 212 | (* Literal 0 - 143 *) 213 | | { element : 8; 214 | rest : -1 : bitstring } when element >= 48 && element <= 191 -> 215 | let decoded, rest = decode_huffman rest in 216 | (Literal (Char.chr @@ element - 0x30)::decoded, rest) 217 | (* Literal 280 - 287 *) 218 | | { element : 8 } when element >= 192 && element <= 199 -> 219 | failwith "Length code" 220 | (* Literal 144 - 255 *) 221 | | { element : 9; 222 | rest : -1 : bitstring } when element >= 400 && element <= 511 -> 223 | let decoded, rest = decode_huffman rest in 224 | (Literal (Char.chr @@ element - 0x190)::decoded, rest) 225 | | { _ } -> failwith "Invalid huffman segment" 226 | 227 | let parse_segment bitstring = 228 | bitmatch bitstring with 229 | | { bfinal : 1; 230 | (* Non-compressed block, BTYPE=00 *) 231 | 0 : 2; 232 | rest : -1 : bitstring } -> 233 | let rest = rest 234 | |> align_to_next_byte 235 | |> BS.to_string 236 | |> reverse_string_bits 237 | |> BS.of_string 238 | in 239 | (bitmatch rest with 240 | | { len : 16 : littleendian; 241 | nlen : 16 : littleendian; 242 | rest : -1 : bitstring } -> 243 | bitmatch rest with 244 | | { bytes : 8*len : string; 245 | rest : -1 : string } -> 246 | let rest = rest |> reverse_string_bits |> BS.of_string in 247 | ((final_block bfinal, Uncompressed bytes), rest)) 248 | | { bfinal : 1; 249 | (* fixed Huffman, BTYPE=0b01, 0b10=2 in reversed *) 250 | 2 : 2; 251 | rest : -1 : bitstring } -> 252 | let decoded, rest = decode_huffman rest in 253 | ((final_block bfinal, FixedHuffman decoded), rest) 254 | | { bfinal : 1; 255 | (* dynamic Huffman, BTYPE=10, 0b01=1 in reversed *) 256 | 1 : 2; 257 | _ : -1 : bitstring } -> 258 | (* ((final_block bfinal, DynamicHuffman, []), rest) *) 259 | failwith "Dynamic Huffman, TODO" 260 | | { bfinal : 1; 261 | (* reserved, BTYPE=11, fail *) 262 | 3 : 2} -> 263 | failwith "Reserved block, invalid bitstream" 264 | 265 | let parse_adler32 bits = 266 | let bits = bits 267 | |> align_to_next_byte 268 | |> BS.take 32 269 | |> BS.to_string 270 | |> reverse_string_bits 271 | |> BS.of_string 272 | in 273 | bitmatch bits with 274 | | { adler32 : 32 } -> adler32 275 | 276 | let rec parse_payload bits = 277 | let seg, rest = parse_segment bits in 278 | match seg with 279 | | Last, _ -> ([seg], rest) 280 | | Continues, _ -> 281 | let (segs, rest) = parse_payload rest in 282 | (seg :: segs, rest) 283 | 284 | let parse_zlib bytestring = 285 | let bits = BS.of_string bytestring in 286 | bitmatch bits with 287 | | { header: 16: bitstring; blocks: -1: string } -> 288 | let payload, rest = blocks 289 | |> reverse_string_bits 290 | |> BS.of_string 291 | |> parse_payload in 292 | (parse_zlib_header header, payload, parse_adler32 rest) 293 | 294 | let adler32 data = 295 | let (a, b) = List.fold_left (fun (a, b) d -> 296 | (* these are guaranteed to be 16 bit which fits 31 bit OCaml ints *) 297 | let new_a = (a + d) mod 65521 in 298 | let new_b = (b + new_a) mod 65521 in 299 | (new_a, new_b)) 300 | (1, 0) data in 301 | let a32 = Int32.of_int a 302 | and b32 = Int32.of_int b 303 | in Int32.logor a32 @@ Int32.shift_left b32 16 304 | 305 | let inflate data = 306 | let bits = BS.of_string data in 307 | ignore @@ parse_payload bits; 308 | "Foo" 309 | --------------------------------------------------------------------------------