├── .gitattributes ├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches └── bench.rs ├── changelog.md ├── deflate-fuzz-target ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── in │ ├── dump.bin │ ├── issue_18_201911.bin │ └── short.bin └── src │ └── main.rs ├── src ├── bit_reverse.rs ├── bitstream.rs ├── chained_hash_table.rs ├── checksum.rs ├── compress.rs ├── compression_options.rs ├── deflate_state.rs ├── encoder_state.rs ├── huffman_lengths.rs ├── huffman_table.rs ├── input_buffer.rs ├── length_encode.rs ├── lib.rs ├── lz77.rs ├── lzvalue.rs ├── matching.rs ├── output_writer.rs ├── rle.rs ├── stored_block.rs ├── test_utils.rs ├── writer.rs └── zlib.rs └── tests ├── afl └── default │ ├── id_000000,sig_06,src_000831,op_havoc,rep_64 │ ├── id_000001,sig_06,src_000891,op_havoc,rep_8 │ ├── id_000002,sig_06,src_000891,op_havoc,rep_8 │ ├── id_000003,sig_06,src_000891,op_havoc,rep_16 │ ├── id_000005,sig_06,src_000995,op_flip1,pos_65535 │ ├── id_000006,sig_06,src_000995,op_ext_AO,pos_65534 │ ├── id_000007,sig_06,src_001004,op_flip1,pos_15 │ ├── id_000008,sig_06,src_001004,op_flip1,pos_15 │ ├── id_000009,sig_06,src_001004,op_flip1,pos_238 │ ├── id_000010,sig_06,src_001004,op_flip1,pos_554 │ ├── id_000011,sig_06,src_001004,op_flip1,pos_1954 │ ├── id_000012,sig_06,src_001004,op_flip1,pos_1955 │ ├── id_000013,sig_06,src_001004,op_flip1,pos_1958 │ ├── id_000014,sig_06,src_001004,op_flip1,pos_1963 │ ├── id_000015,sig_06,src_001004,op_flip1,pos_6662 │ ├── id_000016,sig_06,src_001004,op_flip1,pos_15144 │ ├── id_000017,sig_06,src_001004,op_flip1,pos_15321 │ ├── id_000018,sig_06,src_001004,op_flip1,pos_16334 │ ├── id_000019,sig_06,src_001004,op_flip1,pos_17475 │ ├── id_000020,sig_06,src_001004,op_flip1,pos_18334 │ ├── id_000021,sig_06,src_001004,op_flip1,pos_20365 │ ├── id_000022,sig_06,src_001004,op_flip1,pos_20500 │ ├── id_000023,sig_06,src_001004,op_flip1,pos_20513 │ ├── id_000024,sig_06,src_001004,op_flip1,pos_20518 │ ├── id_000025,sig_06,src_001004,op_flip1,pos_20521 │ ├── id_000026,sig_06,src_001004,op_flip1,pos_20522 │ ├── id_000027,sig_06,src_001004,op_flip1,pos_20525 │ ├── id_000028,sig_06,src_001004,op_flip1,pos_20527 │ ├── id_000029,sig_06,src_001004,op_flip1,pos_20550 │ ├── id_000030,sig_06,src_001004,op_flip1,pos_25139 │ ├── id_000031,sig_06,src_001004,op_flip1,pos_25204 │ ├── id_000032,sig_06,src_001004,op_flip1,pos_32259 │ ├── id_000033,sig_06,src_001004,op_flip1,pos_44443 │ ├── id_000034,sig_06,src_001004,op_flip1,pos_50703 │ ├── id_000035,sig_06,src_001004,op_flip1,pos_57865 │ ├── id_000036,sig_06,src_001004,op_flip1,pos_63260 │ ├── id_000037,sig_06,src_001004,op_flip2,pos_40352 │ ├── id_000038,sig_06,src_001004,op_arith8,pos_15330,val_-31 │ ├── id_000039,sig_06,src_001004,op_arith8,pos_32239,val_-30 │ ├── id_000040,sig_06,src_001004,op_int32,pos_10338,val_be_+512 │ ├── id_000041,sig_06,src_001004,op_int32,pos_20438,val_be_+1 │ ├── id_000042,sig_06,src_001004,op_int32,pos_23295,val_+100 │ ├── id_000043,sig_06,src_001004,op_int32,pos_25214,val_+16 │ ├── id_000044,sig_06,src_001004,op_ext_AO,pos_55585 │ └── id_000045,sig_06,src_001006,op_havoc,rep_8 ├── issue_18_201911.bin ├── issue_44.zlib ├── pg11.txt ├── short.bin └── test.rs /.gitattributes: -------------------------------------------------------------------------------- 1 | tests/*.txt binary 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled files 2 | *.o 3 | *.so 4 | *.rlib 5 | *.dll 6 | 7 | # Executables 8 | *.exe 9 | 10 | # Generated by Cargo 11 | /target/ 12 | 13 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 14 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 15 | Cargo.lock 16 | 17 | /data/ 18 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - stable 4 | - beta 5 | - nightly 6 | - 1.32.0 7 | os: 8 | - linux 9 | - osx 10 | - windows 11 | sudo: false 12 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "deflate" 3 | version = "1.0.0" 4 | edition = "2018" 5 | license = "MIT/Apache-2.0" 6 | authors = ["oyvindln "] 7 | readme = "README.md" 8 | keywords = ["flate", "deflate", "zlib", "compression", "gzip"] 9 | repository = "https://github.com/image-rs/deflate-rs" 10 | homepage = "https://github.com/image-rs/deflate-rs" 11 | documentation = "https://docs.rs/deflate/" 12 | description = """ 13 | A DEFLATE, zlib and gzip encoder written in Rust. 14 | """ 15 | categories = ["compression"] 16 | 17 | exclude = [ 18 | "tests/*", 19 | "fuzz/*" 20 | ] 21 | 22 | [dependencies] 23 | adler32 = "1.2.0" 24 | gzip-header = { version = "1.0", optional = true } 25 | 26 | [dev-dependencies] 27 | miniz_oxide = "0.5.0" 28 | 29 | [features] 30 | benchmarks = [] 31 | gzip = ["gzip-header"] 32 | 33 | [package.metadata.docs.rs] 34 | features = ["gzip"] 35 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deflate-rs 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/deflate.svg)](https://crates.io/crates/deflate)[![Docs](https://docs.rs/deflate/badge.svg)](https://docs.rs/deflate) 4 | 5 | 6 | An implementation of a [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html) encoder in pure Rust. Not a direct port, but does take some inspiration from [zlib](https://www.zlib.net/), [miniz](https://github.com/richgel999/miniz) and [zopfli](https://github.com/google/zopfli). The API is based on the one in the [flate2](https://crates.io/crates/flate2) crate that contains bindings, zlib miniz_oxide, and miniz. 7 | 8 | Deflate encoding with and without zlib and gzip metadata (zlib dictionaries are not supported) is supported. No unsafe code is used. 9 | 10 | Encoding in gzip format requires enabling the 'gzip' feature. 11 | 12 | This library is now mostly in maintenance mode, focus being on the Rust backend of [flate2](https://crates.io/crates/flate2) instead. 13 | 14 | The minimum required Rust version is 1.32.0 due to use of library functions for endinaness conversion (unit tests requires a newer version). 15 | 16 | # Usage: 17 | ## Simple compression function: 18 | ``` rust 19 | use deflate::deflate_bytes; 20 | 21 | let data = b"Some data"; 22 | let compressed = deflate_bytes(&data); 23 | ``` 24 | 25 | ## Using a writer: 26 | 27 | ``` rust 28 | use std::io::Write; 29 | 30 | use deflate::Compression; 31 | use deflate::write::ZlibEncoder; 32 | 33 | let data = b"This is some test data"; 34 | let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default); 35 | encoder.write_all(data).unwrap(); 36 | let compressed_data = encoder.finish().unwrap(); 37 | ``` 38 | 39 | # Other deflate/zlib Rust projects from various people 40 | * [flate2](https://github.com/rust-lang/flate2-rs) FLATE, Gzip, and Zlib bindings for Rust - can use miniz_oxide for a pure Rust implementation. 41 | * [Zopfli in Rust](https://github.com/carols10cents/zopfli) Rust port of zopfli 42 | * [inflate](https://github.com/image-rs/inflate) DEFLATE decoder implemented in Rust 43 | * [miniz-oxide](https://github.com/Frommi/miniz_oxide) Port of miniz to Rust. 44 | * [libflate](https://github.com/sile/libflate) Another DEFLATE/Zlib/Gzip encoder and decoder written in Rust. (Only does some very light compression). 45 | 46 | # License 47 | deflate is distributed under the terms of both the MIT and Apache 2.0 licences. 48 | 49 | bitstream.rs is © @nwin and was released under both MIT and Apache 2.0 50 | 51 | Some code in length_encode.rs has been ported from the `miniz` library, which is public domain. 52 | 53 | The test data (tests/pg11.txt) is borrowed from [Project Gutenberg](https://www.gutenberg.org/ebooks/11) and is available under public domain, or the Project Gutenberg Licence 54 | -------------------------------------------------------------------------------- /benches/bench.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate deflate; 4 | extern crate flate2; 5 | extern crate test; 6 | 7 | use std::io; 8 | use std::io::Write; 9 | 10 | use deflate::{deflate_bytes_zlib, deflate_bytes_zlib_conf, CompressionOptions}; 11 | use flate2::write; 12 | use flate2::Compression; 13 | use test::Bencher; 14 | 15 | fn load_from_file(name: &str) -> Vec { 16 | use std::fs::File; 17 | use std::io::Read; 18 | let mut input = Vec::new(); 19 | let mut f = File::open(name).unwrap(); 20 | 21 | f.read_to_end(&mut input).unwrap(); 22 | input 23 | } 24 | 25 | fn get_test_data() -> Vec { 26 | use std::env; 27 | let path = env::var("TEST_FILE").unwrap_or_else(|_| "tests/pg11.txt".to_string()); 28 | load_from_file(&path) 29 | } 30 | 31 | #[bench] 32 | fn test_file_zlib_def(b: &mut Bencher) { 33 | let test_data = get_test_data(); 34 | 35 | b.iter(|| deflate_bytes_zlib(&test_data)); 36 | } 37 | 38 | #[bench] 39 | fn test_file_zlib_best(b: &mut Bencher) { 40 | let test_data = get_test_data(); 41 | 42 | b.iter(|| deflate_bytes_zlib_conf(&test_data, CompressionOptions::high())); 43 | } 44 | 45 | #[bench] 46 | fn test_file_zlib_fast(b: &mut Bencher) { 47 | let test_data = get_test_data(); 48 | 49 | b.iter(|| deflate_bytes_zlib_conf(&test_data, CompressionOptions::fast())); 50 | } 51 | 52 | #[bench] 53 | fn test_file_zlib_rle(b: &mut Bencher) { 54 | let test_data = get_test_data(); 55 | 56 | b.iter(|| deflate_bytes_zlib_conf(&test_data, CompressionOptions::rle())); 57 | } 58 | 59 | fn deflate_bytes_flate2_zlib(level: Compression, input: &[u8]) -> Vec { 60 | use flate2::write::ZlibEncoder; 61 | 62 | let mut e = ZlibEncoder::new(Vec::with_capacity(input.len() / 3), level); 63 | e.write_all(input).unwrap(); 64 | e.finish().unwrap() 65 | } 66 | 67 | #[bench] 68 | fn test_file_zlib_flate2_def(b: &mut Bencher) { 69 | let test_data = get_test_data(); 70 | b.iter(|| deflate_bytes_flate2_zlib(Compression::default(), &test_data)); 71 | } 72 | 73 | #[bench] 74 | fn test_file_zlib_flate2_best(b: &mut Bencher) { 75 | let test_data = get_test_data(); 76 | b.iter(|| deflate_bytes_flate2_zlib(Compression::best(), &test_data)); 77 | } 78 | 79 | #[bench] 80 | fn test_file_zlib_flate2_fast(b: &mut Bencher) { 81 | let test_data = get_test_data(); 82 | b.iter(|| deflate_bytes_flate2_zlib(Compression::fast(), &test_data)); 83 | } 84 | 85 | #[derive(Copy, Clone)] 86 | struct Dummy {} 87 | 88 | impl Write for Dummy { 89 | fn write(&mut self, buf: &[u8]) -> io::Result { 90 | Ok(buf.len()) 91 | } 92 | 93 | fn flush(&mut self) -> io::Result<()> { 94 | Ok(()) 95 | } 96 | } 97 | 98 | #[bench] 99 | fn writer_create(b: &mut Bencher) { 100 | use deflate::write::DeflateEncoder; 101 | b.iter(|| DeflateEncoder::new(Dummy {}, CompressionOptions::fast())); 102 | } 103 | 104 | #[bench] 105 | fn writer_create_flate2(b: &mut Bencher) { 106 | b.iter(|| write::DeflateEncoder::new(Dummy {}, Compression::fast())); 107 | } 108 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | 2 | ## 1.0.0 (2021-11-10) 3 | 4 | * This release does not add any new features or change the API, only some minor refactor and doc fixes. It's mainly for indicating crate stability. 5 | 6 | #### Bug Fixes 7 | 8 | * fix warnings from rustc and clippy, update dependencies ([a5fcb05e](https://github.com/image-rs/deflate-rs/commit/a5fcb05e74cf6529ebb98b46701f666f1039c5e3)) 9 | * Fix test data location and capitalize Rust, update various links, see pr #53 (thanks @atouchet) ([34fecd18](https://github.com/image-rs/deflate-rs/commit/34fecd18cafaf40ea71abfdcbf054db90e7ce5fa)) 10 | 11 | 12 | ### 0.9.1 (2021-03-24) 13 | 14 | #### Bug Fixes 15 | 16 | * Fix gzip feature that was broken in 0.9 (thanks @oheralla) ([49ac5cfe](https://github.com/image-rs/deflate-rs/commit/49ac5cfec5e1a6c4398a8753309e1f7d66108c41)) 17 | 18 | 19 | 20 | ## 0.9.0 (2021-01-21) 21 | 22 | #### Bug Fixes 23 | 24 | * Use std functions instead of byteorder (bumps minimum version to 1.32.0 ([d217fbd9](https://github.com/image-rs/deflate-rs/commit/d217fbd956597706d80efc1de93c65f4fbe957fd)) 25 | 26 | 27 | ### 0.8.6 (2020-07-06) 28 | 29 | 30 | #### Bug Fixes 31 | 32 | * try to fix issues with sync flush behaviour ([6c97e514](https://github.com/image-rs/deflate-rs/commit/6c97e5143df139af578cdd884e0dee9940414ea1), closes [#48](https://github.com/image-rs/deflate-rs/issues/48)) 33 | * add #!forbid(unsafe_code) to crate root ([fcbe4206](https://github.com/image-rs/deflate-rs/commit/fcbe4206c45cf55d80ae8feb94f0613fe795659f)) 34 | 35 | 36 | 37 | 38 | ### 0.8.5 (2020-07-04) 39 | 40 | 41 | #### Bug Fixes 42 | 43 | * Avoid infinitely looping on sync flush with short buffer writers ([99a1a75f](99a1a75f), closes [#47](https://github.com/image-rs/deflate-rs/issues/47)) 44 | * Remove unsafe in write_length_rle ([77227c8b](77227c8b), closes [#46](https://github.com/image-rs/deflate-rs/issues/46)) 45 | 46 | 47 | 48 | 49 | ### 0.8.4 (2020-04-04) 50 | 51 | 52 | #### Bug Fixes 53 | 54 | * Fix block size counter bug [#44](https://github.com/image-rs/deflate-rs/issues/44) (probably introduced in 1b70be) 55 | that triggered a debug assertion and that could possibly in theory cause stored block to start at the wrong input position at a block split with low entropy data followed by uncompressible data. 56 | -------------------------------------------------------------------------------- /deflate-fuzz-target/.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | target 3 | -------------------------------------------------------------------------------- /deflate-fuzz-target/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "adler32" 5 | version = "1.0.4" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | 8 | [[package]] 9 | name = "afl" 10 | version = "0.5.1" 11 | source = "registry+https://github.com/rust-lang/crates.io-index" 12 | dependencies = [ 13 | "cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)", 14 | "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", 15 | "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", 16 | "xdg 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", 17 | ] 18 | 19 | [[package]] 20 | name = "ansi_term" 21 | version = "0.11.0" 22 | source = "registry+https://github.com/rust-lang/crates.io-index" 23 | dependencies = [ 24 | "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", 25 | ] 26 | 27 | [[package]] 28 | name = "atty" 29 | version = "0.2.13" 30 | source = "registry+https://github.com/rust-lang/crates.io-index" 31 | dependencies = [ 32 | "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", 33 | "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", 34 | ] 35 | 36 | [[package]] 37 | name = "bitflags" 38 | version = "1.2.1" 39 | source = "registry+https://github.com/rust-lang/crates.io-index" 40 | 41 | [[package]] 42 | name = "byteorder" 43 | version = "1.3.2" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | 46 | [[package]] 47 | name = "cc" 48 | version = "1.0.47" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | 51 | [[package]] 52 | name = "clap" 53 | version = "2.33.0" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | dependencies = [ 56 | "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", 57 | "atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)", 58 | "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", 59 | "strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", 60 | "textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", 61 | "unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", 62 | "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", 63 | ] 64 | 65 | [[package]] 66 | name = "deflate" 67 | version = "0.8.2" 68 | dependencies = [ 69 | "adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", 70 | "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", 71 | ] 72 | 73 | [[package]] 74 | name = "deflate-fuzz-target" 75 | version = "0.1.0" 76 | dependencies = [ 77 | "afl 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", 78 | "deflate 0.8.2", 79 | "miniz_oxide 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", 80 | ] 81 | 82 | [[package]] 83 | name = "libc" 84 | version = "0.2.65" 85 | source = "registry+https://github.com/rust-lang/crates.io-index" 86 | 87 | [[package]] 88 | name = "miniz_oxide" 89 | version = "0.3.5" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | dependencies = [ 92 | "adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", 93 | ] 94 | 95 | [[package]] 96 | name = "rustc_version" 97 | version = "0.2.3" 98 | source = "registry+https://github.com/rust-lang/crates.io-index" 99 | dependencies = [ 100 | "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", 101 | ] 102 | 103 | [[package]] 104 | name = "semver" 105 | version = "0.9.0" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | dependencies = [ 108 | "semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", 109 | ] 110 | 111 | [[package]] 112 | name = "semver-parser" 113 | version = "0.7.0" 114 | source = "registry+https://github.com/rust-lang/crates.io-index" 115 | 116 | [[package]] 117 | name = "strsim" 118 | version = "0.8.0" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | 121 | [[package]] 122 | name = "textwrap" 123 | version = "0.11.0" 124 | source = "registry+https://github.com/rust-lang/crates.io-index" 125 | dependencies = [ 126 | "unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", 127 | ] 128 | 129 | [[package]] 130 | name = "unicode-width" 131 | version = "0.1.6" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | 134 | [[package]] 135 | name = "vec_map" 136 | version = "0.8.1" 137 | source = "registry+https://github.com/rust-lang/crates.io-index" 138 | 139 | [[package]] 140 | name = "winapi" 141 | version = "0.3.8" 142 | source = "registry+https://github.com/rust-lang/crates.io-index" 143 | dependencies = [ 144 | "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 145 | "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 146 | ] 147 | 148 | [[package]] 149 | name = "winapi-i686-pc-windows-gnu" 150 | version = "0.4.0" 151 | source = "registry+https://github.com/rust-lang/crates.io-index" 152 | 153 | [[package]] 154 | name = "winapi-x86_64-pc-windows-gnu" 155 | version = "0.4.0" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | 158 | [[package]] 159 | name = "xdg" 160 | version = "2.2.0" 161 | source = "registry+https://github.com/rust-lang/crates.io-index" 162 | 163 | [metadata] 164 | "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" 165 | "checksum afl 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e27ab52ba6f9254a0e6e48481e07fca75dd32a3bd2c8bfd407beb4f308521938" 166 | "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" 167 | "checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" 168 | "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 169 | "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" 170 | "checksum cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)" = "aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8" 171 | "checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" 172 | "checksum libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)" = "1a31a0627fdf1f6a39ec0dd577e101440b7db22672c0901fe00a9a6fbb5c24e8" 173 | "checksum miniz_oxide 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6f3f74f726ae935c3f514300cc6773a0c9492abc5e972d42ba0c0ebb88757625" 174 | "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" 175 | "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" 176 | "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" 177 | "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" 178 | "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" 179 | "checksum unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7007dbd421b92cc6e28410fe7362e2e0a2503394908f417b68ec8d1c364c4e20" 180 | "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" 181 | "checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" 182 | "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 183 | "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 184 | "checksum xdg 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d089681aa106a86fade1b0128fb5daf07d5867a509ab036d99988dec80429a57" 185 | -------------------------------------------------------------------------------- /deflate-fuzz-target/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "deflate-fuzz-target" 3 | version = "0.1.0" 4 | authors = ["Andrew Champion "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | afl = "0.5" 11 | deflate = { path = "../" } 12 | miniz_oxide = "0.3" 13 | -------------------------------------------------------------------------------- /deflate-fuzz-target/in/dump.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/deflate-fuzz-target/in/dump.bin -------------------------------------------------------------------------------- /deflate-fuzz-target/in/issue_18_201911.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/deflate-fuzz-target/in/issue_18_201911.bin -------------------------------------------------------------------------------- /deflate-fuzz-target/in/short.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/deflate-fuzz-target/in/short.bin -------------------------------------------------------------------------------- /deflate-fuzz-target/src/main.rs: -------------------------------------------------------------------------------- 1 | use afl::fuzz; 2 | use deflate::CompressionOptions; 3 | 4 | fn roundtrip(data: &[u8]) { 5 | roundtrip_conf(data, CompressionOptions::default()); 6 | roundtrip_conf(data, CompressionOptions::fast()); 7 | } 8 | 9 | fn roundtrip_conf(data: &[u8], level: CompressionOptions) { 10 | let compressed = deflate::deflate_bytes_zlib_conf(data, level); 11 | let decompressed = 12 | miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).expect("Decompression failed!"); 13 | assert!(decompressed.as_slice() == data); 14 | } 15 | 16 | fn main() { 17 | fuzz!(|data: &[u8]| { 18 | roundtrip(data) 19 | }); 20 | } 21 | 22 | -------------------------------------------------------------------------------- /src/bit_reverse.rs: -------------------------------------------------------------------------------- 1 | /// Reverse the first length bits of n. 2 | /// (Passing more than 16 as length will produce garbage. 3 | pub fn reverse_bits(mut n: u16, length: u8) -> u16 { 4 | // Borrowed from http://aggregate.org/MAGIC/#Bit%20Reversal 5 | n = ((n & 0xaaaa) >> 1) | ((n & 0x5555) << 1); 6 | n = ((n & 0xcccc) >> 2) | ((n & 0x3333) << 2); 7 | n = ((n & 0xf0f0) >> 4) | ((n & 0x0f0f) << 4); 8 | n = ((n & 0xff00) >> 8) | ((n & 0x00ff) << 8); 9 | n >> (16 - length) 10 | } 11 | 12 | #[cfg(test)] 13 | mod test { 14 | use super::reverse_bits; 15 | #[test] 16 | fn test_bit_reverse() { 17 | assert_eq!(reverse_bits(0b0111_0100, 8), 0b0010_1110); 18 | assert_eq!( 19 | reverse_bits(0b1100_1100_1100_1100, 16), 20 | 0b0011_0011_0011_0011 21 | ); 22 | // Check that we ignore >16 length 23 | // We no longer check for this. 24 | // assert_eq!(reverse_bits(0b11001100_11001100, 32), 0b0011001100110011); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/bitstream.rs: -------------------------------------------------------------------------------- 1 | // This was originally based on code from: https://github.com/nwin/lzw 2 | // Copyright (c) 2015 nwin 3 | // which is under both Apache 2.0 and MIT 4 | 5 | //! This module provides a bit writer 6 | use std::io::{self, Write}; 7 | 8 | #[cfg(target_pointer_width = "64")] 9 | #[macro_use] 10 | mod arch_dep { 11 | /// The data type of the accumulator. 12 | /// a 64-bit value allows us to store more before 13 | /// each push to the vector, but is sub-optimal 14 | /// on 32-bit platforms. 15 | pub type AccType = u64; 16 | pub const FLUSH_AT: u8 = 48; 17 | /// Push pending bits to vector. 18 | /// Using a macro here since an inline function. 19 | /// didn't optimise properly. 20 | /// TODO June 2019: See if it's still needed. 21 | macro_rules! push { 22 | ($s:ident) => { 23 | $s.w.extend_from_slice( 24 | &[ 25 | $s.acc as u8, 26 | ($s.acc >> 8) as u8, 27 | ($s.acc >> 16) as u8, 28 | ($s.acc >> 24) as u8, 29 | ($s.acc >> 32) as u8, 30 | ($s.acc >> 40) as u8, 31 | ][..], 32 | ) 33 | }; 34 | } 35 | } 36 | #[cfg(not(target_pointer_width = "64"))] 37 | #[macro_use] 38 | mod arch_dep { 39 | pub type AccType = u32; 40 | pub const FLUSH_AT: u8 = 16; 41 | macro_rules! push { 42 | ($s:ident) => { 43 | // Unlike the 64-bit case, using copy_from_slice seemed to worsen performance here. 44 | // TODO: Needs benching on a 32-bit system to see what works best. 45 | $s.w.push($s.acc as u8); 46 | $s.w.push(($s.acc >> 8) as u8); 47 | }; 48 | } 49 | } 50 | 51 | use self::arch_dep::*; 52 | 53 | /// Writes bits to a byte stream, LSB first. 54 | pub struct LsbWriter { 55 | // Public for now so it can be replaced after initialization. 56 | pub w: Vec, 57 | bits: u8, 58 | acc: AccType, 59 | } 60 | 61 | impl LsbWriter { 62 | /// Creates a new bit reader 63 | pub const fn new(writer: Vec) -> LsbWriter { 64 | LsbWriter { 65 | w: writer, 66 | bits: 0, 67 | acc: 0, 68 | } 69 | } 70 | 71 | pub const fn pending_bits(&self) -> u8 { 72 | self.bits 73 | } 74 | 75 | /// Buffer n number of bits, and write them to the vec if there are enough pending bits. 76 | pub fn write_bits(&mut self, v: u16, n: u8) { 77 | // NOTE: This outputs garbage data if n is 0, but v is not 0 78 | self.acc |= (AccType::from(v)) << self.bits; 79 | self.bits += n; 80 | // Waiting until we have FLUSH_AT bits and pushing them all in one batch. 81 | while self.bits >= FLUSH_AT { 82 | push!(self); 83 | self.acc >>= FLUSH_AT; 84 | self.bits -= FLUSH_AT; 85 | } 86 | } 87 | 88 | fn write_bits_finish(&mut self, v: u16, n: u8) { 89 | // NOTE: This outputs garbage data if n is 0, but v is not 0 90 | self.acc |= (AccType::from(v)) << self.bits; 91 | self.bits += n % 8; 92 | while self.bits >= 8 { 93 | self.w.push(self.acc as u8); 94 | self.acc >>= 8; 95 | self.bits -= 8; 96 | } 97 | } 98 | 99 | pub fn flush_raw(&mut self) { 100 | let missing = FLUSH_AT - self.bits; 101 | // Have to test for self.bits > 0 here, 102 | // otherwise flush would output an extra byte when flush was called at a byte boundary 103 | if missing > 0 && self.bits > 0 { 104 | self.write_bits_finish(0, missing); 105 | } 106 | } 107 | } 108 | 109 | impl Write for LsbWriter { 110 | fn write(&mut self, buf: &[u8]) -> io::Result { 111 | if self.acc == 0 { 112 | self.w.extend_from_slice(buf) 113 | } else { 114 | for &byte in buf.iter() { 115 | self.write_bits(u16::from(byte), 8) 116 | } 117 | } 118 | Ok(buf.len()) 119 | } 120 | 121 | fn flush(&mut self) -> io::Result<()> { 122 | self.flush_raw(); 123 | Ok(()) 124 | } 125 | } 126 | 127 | #[cfg(test)] 128 | mod test { 129 | use super::LsbWriter; 130 | 131 | #[test] 132 | fn write_bits() { 133 | let input = [ 134 | (3, 3), 135 | (10, 8), 136 | (88, 7), 137 | (0, 2), 138 | (0, 5), 139 | (0, 0), 140 | (238, 8), 141 | (126, 8), 142 | (161, 8), 143 | (10, 8), 144 | (238, 8), 145 | (174, 8), 146 | (126, 8), 147 | (174, 8), 148 | (65, 8), 149 | (142, 8), 150 | (62, 8), 151 | (10, 8), 152 | (1, 8), 153 | (161, 8), 154 | (78, 8), 155 | (62, 8), 156 | (158, 8), 157 | (206, 8), 158 | (10, 8), 159 | (64, 7), 160 | (0, 0), 161 | (24, 5), 162 | (0, 0), 163 | (174, 8), 164 | (126, 8), 165 | (193, 8), 166 | (174, 8), 167 | ]; 168 | let expected = [ 169 | 83, 192, 2, 220, 253, 66, 21, 220, 93, 253, 92, 131, 28, 125, 20, 2, 66, 157, 124, 60, 170 | 157, 21, 128, 216, 213, 47, 216, 21, 171 | ]; 172 | let mut writer = LsbWriter::new(Vec::new()); 173 | for v in input.iter() { 174 | writer.write_bits(v.0, v.1); 175 | } 176 | writer.flush_raw(); 177 | assert_eq!(writer.w, expected); 178 | } 179 | } 180 | 181 | #[cfg(all(test, feature = "benchmarks"))] 182 | mod bench { 183 | use super::LsbWriter; 184 | use test_std::Bencher; 185 | #[bench] 186 | fn bit_writer(b: &mut Bencher) { 187 | let input = [ 188 | (3, 3), 189 | (10, 8), 190 | (88, 7), 191 | (0, 2), 192 | (0, 5), 193 | (0, 0), 194 | (238, 8), 195 | (126, 8), 196 | (161, 8), 197 | (10, 8), 198 | (238, 8), 199 | (174, 8), 200 | (126, 8), 201 | (174, 8), 202 | (65, 8), 203 | (142, 8), 204 | (62, 8), 205 | (10, 8), 206 | (1, 8), 207 | (161, 8), 208 | (78, 8), 209 | (62, 8), 210 | (158, 8), 211 | (206, 8), 212 | (10, 8), 213 | (64, 7), 214 | (0, 0), 215 | (24, 5), 216 | (0, 0), 217 | (174, 8), 218 | (126, 8), 219 | (193, 8), 220 | (174, 8), 221 | ]; 222 | let mut writer = LsbWriter::new(Vec::with_capacity(100)); 223 | b.iter(|| { 224 | for v in input.iter() { 225 | let _ = writer.write_bits(v.0, v.1); 226 | } 227 | }); 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /src/chained_hash_table.rs: -------------------------------------------------------------------------------- 1 | pub const WINDOW_SIZE: usize = 32768; 2 | pub const WINDOW_MASK: usize = WINDOW_SIZE - 1; 3 | #[cfg(test)] 4 | pub const HASH_BYTES: usize = 3; 5 | const HASH_SHIFT: u16 = 5; 6 | const HASH_MASK: u16 = WINDOW_MASK as u16; 7 | 8 | /// Helper struct to let us allocate both head and prev in the same block. 9 | struct Tables { 10 | /// Starts of hash chains (in prev) 11 | pub head: [u16; WINDOW_SIZE], 12 | /// Link to previous occurence of this hash value 13 | pub prev: [u16; WINDOW_SIZE], 14 | } 15 | 16 | impl Default for Tables { 17 | #[inline] 18 | fn default() -> Tables { 19 | Tables { 20 | head: [0; WINDOW_SIZE], 21 | prev: [0; WINDOW_SIZE], 22 | } 23 | } 24 | } 25 | 26 | impl Tables { 27 | #[inline] 28 | fn fill_prev(&mut self) { 29 | self.prev.copy_from_slice(&self.head); 30 | } 31 | } 32 | 33 | /// Create and box the hash chains. 34 | fn create_tables() -> Box { 35 | // Using default here is a trick to get around the lack of box syntax on stable Rust. 36 | // 37 | // Box::new([0u16,n]) ends up creating an temporary array on the stack which is not optimised 38 | // but using default, which simply calls `box value` internally allows us to get around this. 39 | // 40 | // We could use vec instead, but using a boxed array helps the compiler optimise 41 | // away bounds checks as `n & WINDOW_MASK < WINDOW_SIZE` will always be true. 42 | let mut t: Box = Box::default(); 43 | 44 | for (n, b) in t.head.iter_mut().enumerate() { 45 | *b = n as u16; 46 | } 47 | 48 | t.fill_prev(); 49 | 50 | t 51 | } 52 | 53 | /// Returns a new hash value based on the previous value and the next byte 54 | #[inline] 55 | pub fn update_hash(current_hash: u16, to_insert: u8) -> u16 { 56 | update_hash_conf(current_hash, to_insert, HASH_SHIFT, HASH_MASK) 57 | } 58 | 59 | #[inline] 60 | fn update_hash_conf(current_hash: u16, to_insert: u8, shift: u16, mask: u16) -> u16 { 61 | ((current_hash << shift) ^ (u16::from(to_insert))) & mask 62 | } 63 | 64 | #[inline] 65 | fn reset_array(arr: &mut [u16; WINDOW_SIZE]) { 66 | for (n, b) in arr.iter_mut().enumerate() { 67 | *b = n as u16; 68 | } 69 | } 70 | 71 | pub struct ChainedHashTable { 72 | // Current running hash value of the last 3 bytes 73 | current_hash: u16, 74 | // Hash chains. 75 | c: Box, 76 | // Used for testing 77 | // count: DebugCounter, 78 | } 79 | 80 | impl ChainedHashTable { 81 | pub fn new() -> ChainedHashTable { 82 | ChainedHashTable { 83 | current_hash: 0, 84 | c: create_tables(), 85 | //count: DebugCounter::default(), 86 | } 87 | } 88 | 89 | #[cfg(test)] 90 | pub fn from_starting_values(v1: u8, v2: u8) -> ChainedHashTable { 91 | let mut t = ChainedHashTable::new(); 92 | t.current_hash = update_hash(t.current_hash, v1); 93 | t.current_hash = update_hash(t.current_hash, v2); 94 | t 95 | } 96 | 97 | /// Resets the hash value and hash chains 98 | pub fn reset(&mut self) { 99 | self.current_hash = 0; 100 | reset_array(&mut self.c.head); 101 | { 102 | let h = self.c.head; 103 | let mut c = self.c.prev; 104 | c[..].copy_from_slice(&h[..]); 105 | } 106 | /*if cfg!(debug_assertions) { 107 | self.count.reset(); 108 | }*/ 109 | } 110 | 111 | pub fn add_initial_hash_values(&mut self, v1: u8, v2: u8) { 112 | self.current_hash = update_hash(self.current_hash, v1); 113 | self.current_hash = update_hash(self.current_hash, v2); 114 | } 115 | 116 | /// Insert a byte into the hash table 117 | #[inline] 118 | pub fn add_hash_value(&mut self, position: usize, value: u8) { 119 | // Check that all bytes are input in order and at the correct positions. 120 | // Disabled for now as it breaks when sync flushing. 121 | /*debug_assert_eq!( 122 | position & WINDOW_MASK, 123 | self.count.get() as usize & WINDOW_MASK 124 | );*/ 125 | debug_assert!( 126 | position < WINDOW_SIZE * 2, 127 | "Position is larger than 2 * window size! {}", 128 | position 129 | ); 130 | // Storing the hash in a temporary variable here makes the compiler avoid the 131 | // bounds checks in this function. 132 | let new_hash = update_hash(self.current_hash, value); 133 | 134 | self.add_with_hash(position, new_hash); 135 | 136 | // Update the stored hash value with the new hash. 137 | self.current_hash = new_hash; 138 | } 139 | 140 | /// Directly set the current hash value 141 | #[inline] 142 | pub fn set_hash(&mut self, hash: u16) { 143 | self.current_hash = hash; 144 | } 145 | 146 | /// Update the tables directly, providing the hash. 147 | #[inline] 148 | pub fn add_with_hash(&mut self, position: usize, hash: u16) { 149 | /*if cfg!(debug_assertions) { 150 | self.count.add(1); 151 | }*/ 152 | 153 | self.c.prev[position & WINDOW_MASK] = self.c.head[hash as usize]; 154 | 155 | // Ignoring any bits over 16 here is deliberate, as we only concern ourselves about 156 | // where in the buffer (which is 64k bytes) we are referring to. 157 | self.c.head[hash as usize] = position as u16; 158 | } 159 | 160 | // Get the head of the hash chain for the current hash value 161 | #[cfg(test)] 162 | #[inline] 163 | pub const fn current_head(&self) -> u16 { 164 | self.c.head[self.current_hash as usize] 165 | } 166 | 167 | #[inline] 168 | pub const fn current_hash(&self) -> u16 { 169 | self.current_hash 170 | } 171 | 172 | #[inline] 173 | pub const fn get_prev(&self, bytes: usize) -> u16 { 174 | self.c.prev[bytes & WINDOW_MASK] 175 | } 176 | 177 | #[cfg(test)] 178 | #[inline] 179 | pub fn farthest_next(&self, match_pos: usize, match_len: usize) -> usize { 180 | let to_check = match_len.saturating_sub(2); 181 | 182 | let mut n = 0; 183 | let mut smallest_prev = self.get_prev(match_pos); 184 | let mut smallest_pos = 0; 185 | while n < to_check { 186 | let prev = self.get_prev(match_pos + n); 187 | if prev < smallest_prev { 188 | smallest_prev = prev; 189 | smallest_pos = n; 190 | } 191 | n += 1; 192 | } 193 | smallest_pos 194 | } 195 | 196 | #[inline] 197 | fn slide_value(b: u16, pos: u16, bytes: u16) -> u16 { 198 | if b >= bytes { 199 | b - bytes 200 | } else { 201 | pos 202 | } 203 | } 204 | 205 | #[inline] 206 | fn slide_table(table: &mut [u16; WINDOW_SIZE], bytes: u16) { 207 | for (n, b) in table.iter_mut().enumerate() { 208 | *b = ChainedHashTable::slide_value(*b, n as u16, bytes); 209 | } 210 | } 211 | 212 | pub fn slide(&mut self, bytes: usize) { 213 | /*if cfg!(debug_assertions) && bytes != WINDOW_SIZE { 214 | // This should only happen in tests in this file. 215 | self.count.reset(); 216 | }*/ 217 | ChainedHashTable::slide_table(&mut self.c.head, bytes as u16); 218 | ChainedHashTable::slide_table(&mut self.c.prev, bytes as u16); 219 | } 220 | } 221 | 222 | #[cfg(test)] 223 | pub fn filled_hash_table(data: &[u8]) -> ChainedHashTable { 224 | assert!(data.len() <= (WINDOW_SIZE * 2) + 2); 225 | let mut hash_table = ChainedHashTable::from_starting_values(data[0], data[1]); 226 | for (n, b) in data[2..].iter().enumerate() { 227 | hash_table.add_hash_value(n, *b); 228 | } 229 | hash_table 230 | } 231 | 232 | #[cfg(test)] 233 | mod test { 234 | use super::{filled_hash_table, ChainedHashTable}; 235 | 236 | #[test] 237 | fn chained_hash() { 238 | use std::str; 239 | 240 | let test_string = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do \ 241 | eiusmod tempor. rum. incididunt ut labore et dolore magna aliqua. Ut \ 242 | enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi \ 243 | ut aliquip ex ea commodo consequat. rum. Duis aute irure dolor in \ 244 | reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \ 245 | pariatur. Excepteur sint occaecat cupidatat non proident, sunt in \ 246 | culpa qui officia deserunt mollit anim id est laborum."; 247 | 248 | let test_data = test_string.as_bytes(); 249 | 250 | let current_bytes = &test_data[test_data.len() - super::HASH_BYTES..test_data.len()]; 251 | 252 | let num_iters = test_string 253 | .matches(str::from_utf8(current_bytes).unwrap()) 254 | .count(); 255 | 256 | let hash_table = filled_hash_table(test_data); 257 | 258 | // Test that the positions in the chain are valid 259 | let mut prev_value = hash_table.get_prev(hash_table.current_head() as usize) as usize; 260 | let mut count = 0; 261 | let mut current = hash_table.current_head() as usize; 262 | while current != prev_value { 263 | count += 1; 264 | current = prev_value; 265 | prev_value = hash_table.get_prev(prev_value) as usize; 266 | } 267 | // There should be at least as many occurences of the hash of the checked bytes as the 268 | // numbers of occurences of the checked bytes themselves. As the hashes are not large enough 269 | // to store 8 * 3 = 24 bits, there could be more with different input data. 270 | assert!(count >= num_iters); 271 | } 272 | 273 | #[test] 274 | fn table_unique() { 275 | let mut test_data = Vec::new(); 276 | test_data.extend(0u8..255); 277 | test_data.extend(255u8..0); 278 | let hash_table = filled_hash_table(&test_data); 279 | let prev_pos = hash_table.get_prev(hash_table.current_head() as usize); 280 | // Since all sequences in the input are unique, there shouldn't be any previous values. 281 | assert_eq!(prev_pos, hash_table.current_hash()); 282 | } 283 | 284 | #[test] 285 | fn table_slide() { 286 | use std::fs::File; 287 | use std::io::Read; 288 | 289 | let window_size = super::WINDOW_SIZE; 290 | let window_size16 = super::WINDOW_SIZE as u16; 291 | 292 | let mut input = Vec::new(); 293 | 294 | let mut f = File::open("tests/pg11.txt").unwrap(); 295 | 296 | f.read_to_end(&mut input).unwrap(); 297 | 298 | let mut hash_table = filled_hash_table(&input[..window_size + 2]); 299 | 300 | for (n, b) in input[2..window_size + 2].iter().enumerate() { 301 | hash_table.add_hash_value(n + window_size, *b); 302 | } 303 | 304 | hash_table.slide(window_size); 305 | 306 | { 307 | let max_head = hash_table.c.head.iter().max().unwrap(); 308 | // After sliding there should be no hashes referring to values 309 | // higher than the window size 310 | assert!(*max_head < window_size16); 311 | assert!(*max_head > 0); 312 | let pos = hash_table.get_prev(hash_table.current_head() as usize); 313 | // There should be a previous occurence since we inserted the data 3 times 314 | assert!(pos < window_size16); 315 | assert!(pos > 0); 316 | } 317 | 318 | for (n, b) in input[2..(window_size / 2)].iter().enumerate() { 319 | hash_table.add_hash_value(n + window_size, *b); 320 | } 321 | 322 | // There should hashes referring to values in the upper part of the input window 323 | // at this point 324 | let max_prev = hash_table.c.prev.iter().max().unwrap(); 325 | assert!(*max_prev > window_size16); 326 | 327 | let mut pos = hash_table.current_head(); 328 | // There should be a previous occurence since we inserted the data 3 times 329 | assert!(pos > window_size16); 330 | let end_byte = input[(window_size / 2) - 1 - 2]; 331 | let mut iterations = 0; 332 | while pos > window_size16 && iterations < 5000 { 333 | assert_eq!(input[pos as usize & window_size - 1], end_byte); 334 | 335 | pos = hash_table.get_prev(pos as usize); 336 | iterations += 1; 337 | } 338 | } 339 | 340 | #[test] 341 | /// Ensure that the initial hash values are correct. 342 | fn initial_chains() { 343 | let t = ChainedHashTable::new(); 344 | for (n, &b) in t.c.head.iter().enumerate() { 345 | assert_eq!(n, b as usize); 346 | } 347 | for (n, &b) in t.c.prev.iter().enumerate() { 348 | assert_eq!(n, b as usize); 349 | } 350 | } 351 | } 352 | -------------------------------------------------------------------------------- /src/checksum.rs: -------------------------------------------------------------------------------- 1 | use adler32::RollingAdler32; 2 | 3 | pub trait RollingChecksum { 4 | fn update(&mut self, byte: u8); 5 | fn update_from_slice(&mut self, data: &[u8]); 6 | fn current_hash(&self) -> u32; 7 | } 8 | 9 | pub struct NoChecksum {} 10 | 11 | impl NoChecksum { 12 | pub const fn new() -> NoChecksum { 13 | NoChecksum {} 14 | } 15 | } 16 | 17 | impl RollingChecksum for NoChecksum { 18 | fn update(&mut self, _: u8) {} 19 | fn update_from_slice(&mut self, _: &[u8]) {} 20 | fn current_hash(&self) -> u32 { 21 | 1 22 | } 23 | } 24 | 25 | impl<'a> RollingChecksum for &'a mut NoChecksum { 26 | fn update(&mut self, _: u8) {} 27 | fn update_from_slice(&mut self, _: &[u8]) {} 28 | fn current_hash(&self) -> u32 { 29 | 1 30 | } 31 | } 32 | 33 | pub struct Adler32Checksum { 34 | adler32: RollingAdler32, 35 | } 36 | 37 | impl Adler32Checksum { 38 | pub fn new() -> Adler32Checksum { 39 | Adler32Checksum { 40 | adler32: RollingAdler32::new(), 41 | } 42 | } 43 | } 44 | 45 | impl RollingChecksum for Adler32Checksum { 46 | fn update(&mut self, byte: u8) { 47 | self.adler32.update(byte); 48 | } 49 | 50 | fn update_from_slice(&mut self, data: &[u8]) { 51 | self.adler32.update_buffer(data); 52 | } 53 | 54 | fn current_hash(&self) -> u32 { 55 | self.adler32.hash() 56 | } 57 | } 58 | 59 | impl<'a> RollingChecksum for &'a mut Adler32Checksum { 60 | fn update(&mut self, byte: u8) { 61 | self.adler32.update(byte); 62 | } 63 | 64 | fn update_from_slice(&mut self, data: &[u8]) { 65 | self.adler32.update_buffer(data); 66 | } 67 | 68 | fn current_hash(&self) -> u32 { 69 | self.adler32.hash() 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/compress.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::io::Write; 3 | 4 | use crate::bitstream::LsbWriter; 5 | use crate::deflate_state::DeflateState; 6 | use crate::encoder_state::EncoderState; 7 | use crate::huffman_lengths::{gen_huffman_lengths, write_huffman_lengths, BlockType}; 8 | use crate::lz77::{lz77_compress_block, LZ77Status}; 9 | use crate::lzvalue::LZValue; 10 | use crate::stored_block::{compress_block_stored, write_stored_header, MAX_STORED_BLOCK_LENGTH}; 11 | 12 | const LARGEST_OUTPUT_BUF_SIZE: usize = 1024 * 32; 13 | 14 | /// Flush mode to use when compressing input received in multiple steps. 15 | /// 16 | /// (The more obscure ZLIB flush modes are not implemented.) 17 | #[derive(Eq, PartialEq, Debug, Copy, Clone)] 18 | pub enum Flush { 19 | // Simply wait for more input when we are out of input data to process. 20 | None, 21 | // Send a "sync block", corresponding to Z_SYNC_FLUSH in zlib. This finishes compressing and 22 | // outputting all pending data, and then outputs an empty stored block. 23 | // (That is, the block header indicating a stored block followed by `0000FFFF`). 24 | Sync, 25 | _Partial, 26 | _Block, 27 | _Full, 28 | // Finish compressing and output all remaining input. 29 | Finish, 30 | } 31 | 32 | /// Write all the lz77 encoded data in the buffer using the specified `EncoderState`, and finish 33 | /// with the end of block code. 34 | pub fn flush_to_bitstream(buffer: &[LZValue], state: &mut EncoderState) { 35 | for &b in buffer { 36 | state.write_lzvalue(b.value()); 37 | } 38 | state.write_end_of_block() 39 | } 40 | 41 | /// Compress the input data using only fixed huffman codes. 42 | /// 43 | /// Currently only used in tests. 44 | #[cfg(test)] 45 | pub fn compress_data_fixed(input: &[u8]) -> Vec { 46 | use crate::lz77::lz77_compress; 47 | 48 | let mut state = EncoderState::fixed(Vec::new()); 49 | let compressed = lz77_compress(input).unwrap(); 50 | 51 | // We currently don't split blocks here(this function is just used for tests anyhow) 52 | state.write_start_of_block(true, true); 53 | flush_to_bitstream(&compressed, &mut state); 54 | 55 | state.flush(); 56 | state.reset(Vec::new()) 57 | } 58 | 59 | fn write_stored_block(input: &[u8], mut writer: &mut LsbWriter, final_block: bool) { 60 | // If the input is not zero, we write stored blocks for the input data. 61 | if !input.is_empty() { 62 | let mut i = input.chunks(MAX_STORED_BLOCK_LENGTH).peekable(); 63 | 64 | while let Some(chunk) = i.next() { 65 | let last_chunk = i.peek().is_none(); 66 | // Write the block header 67 | write_stored_header(writer, final_block && last_chunk); 68 | 69 | // Write the actual data. 70 | compress_block_stored(chunk, &mut writer).expect("Write error"); 71 | } 72 | } else { 73 | // If the input length is zero, we output an empty block. This is used for syncing. 74 | write_stored_header(writer, final_block); 75 | compress_block_stored(&[], &mut writer).expect("Write error"); 76 | } 77 | } 78 | 79 | /// Inner compression function used by both the writers and the simple compression functions. 80 | pub fn compress_data_dynamic_n( 81 | input: &[u8], 82 | deflate_state: &mut DeflateState, 83 | flush: Flush, 84 | ) -> io::Result { 85 | let mut bytes_written = 0; 86 | 87 | let mut slice = input; 88 | 89 | // enter the decompression loop unless we did a sync flush, in case we want to make sure 90 | // everything is output before continuing. 91 | while !deflate_state.needs_flush { 92 | let output_buf_len = deflate_state.output_buf().len(); 93 | let output_buf_pos = deflate_state.output_buf_pos; 94 | // If the output buffer has too much data in it already, flush it before doing anything 95 | // else. 96 | if output_buf_len > LARGEST_OUTPUT_BUF_SIZE { 97 | let written = deflate_state 98 | .inner 99 | .as_mut() 100 | .expect("Missing writer!") 101 | .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?; 102 | 103 | if written < output_buf_len.checked_sub(output_buf_pos).unwrap() { 104 | // Only some of the data was flushed, so keep track of where we were. 105 | deflate_state.output_buf_pos += written; 106 | } else { 107 | // If we flushed all of the output, reset the output buffer. 108 | deflate_state.needs_flush = false; 109 | deflate_state.output_buf_pos = 0; 110 | deflate_state.output_buf().clear(); 111 | } 112 | 113 | if bytes_written == 0 { 114 | // If the buffer was already full when the function was called, this has to be 115 | // returned rather than Ok(0) to indicate that we didn't write anything, but are 116 | // not done yet. 117 | return Err(io::Error::new( 118 | io::ErrorKind::Interrupted, 119 | "Internal buffer full.", 120 | )); 121 | } else { 122 | return Ok(bytes_written); 123 | } 124 | } 125 | 126 | if deflate_state.lz77_state.is_last_block() { 127 | // The last block has already been written, so we don't have anything to compress. 128 | break; 129 | } 130 | 131 | let (written, status, position) = lz77_compress_block( 132 | slice, 133 | &mut deflate_state.lz77_state, 134 | &mut deflate_state.input_buffer, 135 | &mut deflate_state.lz77_writer, 136 | flush, 137 | ); 138 | 139 | // Bytes written in this call 140 | bytes_written += written; 141 | // Total bytes written since the compression process started 142 | // TODO: Should we realistically have to worry about overflowing here? 143 | deflate_state.bytes_written += written as u64; 144 | 145 | if status == LZ77Status::NeedInput { 146 | // If we've consumed all the data input so far, and we're not 147 | // finishing or syncing or ending the block here, simply return 148 | // the number of bytes consumed so far. 149 | return Ok(bytes_written); 150 | } 151 | 152 | // Increment start of input data 153 | slice = &slice[written..]; 154 | 155 | // We need to check if this is the last block as the header will then be 156 | // slightly different to indicate this. 157 | let last_block = deflate_state.lz77_state.is_last_block(); 158 | 159 | let current_block_input_bytes = deflate_state.lz77_state.current_block_input_bytes(); 160 | 161 | if cfg!(debug_assertions) { 162 | deflate_state 163 | .bytes_written_control 164 | .add(current_block_input_bytes); 165 | } 166 | 167 | let partial_bits = deflate_state.encoder_state.writer.pending_bits(); 168 | 169 | let res = { 170 | let (l_freqs, d_freqs) = deflate_state.lz77_writer.get_frequencies(); 171 | let (l_lengths, d_lengths) = 172 | deflate_state.encoder_state.huffman_table.get_lengths_mut(); 173 | 174 | gen_huffman_lengths( 175 | l_freqs, 176 | d_freqs, 177 | current_block_input_bytes, 178 | partial_bits, 179 | l_lengths, 180 | d_lengths, 181 | &mut deflate_state.length_buffers, 182 | ) 183 | }; 184 | 185 | // Check if we've actually managed to compress the input, and output stored blocks 186 | // if not. 187 | match res { 188 | BlockType::Dynamic(header) => { 189 | // Write the block header. 190 | deflate_state 191 | .encoder_state 192 | .write_start_of_block(false, last_block); 193 | 194 | // Output the lengths of the huffman codes used in this block. 195 | write_huffman_lengths( 196 | &header, 197 | &deflate_state.encoder_state.huffman_table, 198 | &deflate_state.length_buffers.length_buf, 199 | &mut deflate_state.encoder_state.writer, 200 | ); 201 | 202 | // Uupdate the huffman codes that will be used to encode the 203 | // lz77-compressed data. 204 | deflate_state 205 | .encoder_state 206 | .huffman_table 207 | .update_from_lengths(); 208 | 209 | // Write the huffman compressed data and the end of block marker. 210 | flush_to_bitstream( 211 | deflate_state.lz77_writer.get_buffer(), 212 | &mut deflate_state.encoder_state, 213 | ); 214 | } 215 | BlockType::Fixed => { 216 | // Write the block header for fixed code blocks. 217 | deflate_state 218 | .encoder_state 219 | .write_start_of_block(true, last_block); 220 | 221 | // Use the pre-defined static huffman codes. 222 | deflate_state.encoder_state.set_huffman_to_fixed(); 223 | 224 | // Write the compressed data and the end of block marker. 225 | flush_to_bitstream( 226 | deflate_state.lz77_writer.get_buffer(), 227 | &mut deflate_state.encoder_state, 228 | ); 229 | } 230 | BlockType::Stored => { 231 | // If compression fails, output a stored block instead. 232 | 233 | let start_pos = position.saturating_sub(current_block_input_bytes as usize); 234 | 235 | assert!( 236 | position >= current_block_input_bytes as usize, 237 | "Error! Trying to output a stored block with forgotten data!\ 238 | if you encounter this error, please file an issue!" 239 | ); 240 | 241 | write_stored_block( 242 | &deflate_state.input_buffer.get_buffer()[start_pos..position], 243 | &mut deflate_state.encoder_state.writer, 244 | flush == Flush::Finish && last_block, 245 | ); 246 | } 247 | }; 248 | 249 | // Clear the current lz77 data in the writer for the next call. 250 | deflate_state.lz77_writer.clear(); 251 | // We are done with the block, so we reset the number of bytes taken 252 | // for the next one. 253 | deflate_state.lz77_state.reset_input_bytes(); 254 | 255 | // We are done for now. 256 | if status == LZ77Status::Finished { 257 | // This flush mode means that there should be an empty stored block at the end. 258 | if flush == Flush::Sync { 259 | write_stored_block(&[], &mut deflate_state.encoder_state.writer, false); 260 | // Indicate that we need to flush the buffers before doing anything else. 261 | deflate_state.needs_flush = true; 262 | } else if !deflate_state.lz77_state.is_last_block() { 263 | // Make sure a block with the last block header has been output. 264 | // Not sure this can actually happen, but we make sure to finish properly 265 | // if it somehow does. 266 | // An empty fixed block is the shortest. 267 | let es = &mut deflate_state.encoder_state; 268 | es.set_huffman_to_fixed(); 269 | es.write_start_of_block(true, true); 270 | es.write_end_of_block(); 271 | } 272 | break; 273 | } 274 | } 275 | 276 | // If we reach this point, the remaining data in the buffers is to be flushed. 277 | deflate_state.encoder_state.flush(); 278 | // Make sure we've output everything, and return the number of bytes written if everything 279 | // went well. 280 | let output_buf_pos = deflate_state.output_buf_pos; 281 | let written_to_writer = deflate_state 282 | .inner 283 | .as_mut() 284 | .expect("Missing writer!") 285 | .write(&deflate_state.encoder_state.inner_vec()[output_buf_pos..])?; 286 | if written_to_writer 287 | < deflate_state 288 | .output_buf() 289 | .len() 290 | .checked_sub(output_buf_pos) 291 | .unwrap() 292 | { 293 | deflate_state.output_buf_pos += written_to_writer; 294 | } else { 295 | // If we sucessfully wrote all the data, we can clear the output buffer. 296 | deflate_state.output_buf_pos = 0; 297 | deflate_state.output_buf().clear(); 298 | deflate_state.needs_flush = false; 299 | } 300 | 301 | Ok(bytes_written) 302 | } 303 | 304 | #[cfg(test)] 305 | mod test { 306 | use super::*; 307 | use crate::test_utils::{decompress_to_end, get_test_data}; 308 | 309 | #[test] 310 | /// Test compressing a short string using fixed encoding. 311 | fn fixed_string_mem() { 312 | let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes(); 313 | let compressed = compress_data_fixed(&test_data); 314 | 315 | let result = decompress_to_end(&compressed); 316 | 317 | assert_eq!(test_data, result); 318 | } 319 | 320 | #[test] 321 | fn fixed_data() { 322 | let data = vec![190u8; 400]; 323 | let compressed = compress_data_fixed(&data); 324 | let result = decompress_to_end(&compressed); 325 | 326 | assert_eq!(data, result); 327 | } 328 | 329 | /// Test deflate example. 330 | /// 331 | /// Check if the encoder produces the same code as the example given by Mark Adler here: 332 | /// https://stackoverflow.com/questions/17398931/deflate-encoding-with-static-huffman-codes/17415203 333 | #[test] 334 | fn fixed_example() { 335 | let test_data = b"Deflate late"; 336 | // let check = 337 | // [0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0xc8, 0x49, 0x2c, 0x49, 0x5, 0x0]; 338 | let check = [ 339 | 0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00, 340 | ]; 341 | let compressed = compress_data_fixed(test_data); 342 | assert_eq!(&compressed, &check); 343 | let decompressed = decompress_to_end(&compressed); 344 | assert_eq!(&decompressed, test_data) 345 | } 346 | 347 | #[test] 348 | /// Test compression from a file. 349 | fn fixed_string_file() { 350 | let input = get_test_data(); 351 | 352 | let compressed = compress_data_fixed(&input); 353 | println!("Fixed codes compressed len: {}", compressed.len()); 354 | let result = decompress_to_end(&compressed); 355 | 356 | assert_eq!(input.len(), result.len()); 357 | // Not using assert_eq here deliberately to avoid massive amounts of output spam. 358 | assert!(input == result); 359 | } 360 | } 361 | -------------------------------------------------------------------------------- /src/compression_options.rs: -------------------------------------------------------------------------------- 1 | //! This module contains the various options to tweak how compression is performed. 2 | //! 3 | //! Note that due to the nature of the `DEFLATE` format, lower compression levels 4 | //! may for some data compress better than higher compression levels. 5 | //! 6 | //! For applications where a maximum level of compression (irrespective of compression 7 | //! speed) is required, consider using the [`Zopfli`](https://crates.io/crates/zopfli) 8 | //! compressor, which uses a specialised (but slow) algorithm to figure out the maximum 9 | //! of compression for the provided data. 10 | //! 11 | use crate::lz77::MatchingType; 12 | use std::convert::From; 13 | 14 | pub const HIGH_MAX_HASH_CHECKS: u16 = 1768; 15 | pub const HIGH_LAZY_IF_LESS_THAN: u16 = 128; 16 | /// The maximum number of hash checks that make sense as this is the length 17 | /// of the hash chain. 18 | pub const MAX_HASH_CHECKS: u16 = 32 * 1024; 19 | pub const DEFAULT_MAX_HASH_CHECKS: u16 = 128; 20 | pub const DEFAULT_LAZY_IF_LESS_THAN: u16 = 32; 21 | 22 | /// An enum describing the level of compression to be used by the encoder 23 | /// 24 | /// Higher compression ratios will take longer to encode. 25 | /// 26 | /// This is a simplified interface to specify a compression level. 27 | /// 28 | /// [See also `CompressionOptions`](./struct.CompressionOptions.html) which provides for 29 | /// tweaking the settings more finely. 30 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] 31 | pub enum Compression { 32 | /// Fast minimal compression (`CompressionOptions::fast()`). 33 | Fast, 34 | /// Default level (`CompressionOptions::default()`). 35 | Default, 36 | /// Higher compression level (`CompressionOptions::high()`). 37 | /// 38 | /// Best in this context isn't actually the highest possible level 39 | /// the encoder can do, but is meant to emulate the `Best` setting in the `Flate2` 40 | /// library. 41 | Best, 42 | } 43 | 44 | impl Default for Compression { 45 | fn default() -> Compression { 46 | Compression::Default 47 | } 48 | } 49 | 50 | /// Enum allowing some special options (not implemented yet)! 51 | #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] 52 | pub enum SpecialOptions { 53 | /// Compress normally. 54 | Normal, 55 | /// Force fixed Huffman tables. (Unimplemented!). 56 | _ForceFixed, 57 | /// Force stored (uncompressed) blocks only. (Unimplemented!). 58 | _ForceStored, 59 | } 60 | 61 | impl Default for SpecialOptions { 62 | fn default() -> SpecialOptions { 63 | SpecialOptions::Normal 64 | } 65 | } 66 | 67 | pub const DEFAULT_OPTIONS: CompressionOptions = CompressionOptions { 68 | max_hash_checks: DEFAULT_MAX_HASH_CHECKS, 69 | lazy_if_less_than: DEFAULT_LAZY_IF_LESS_THAN, 70 | matching_type: MatchingType::Lazy, 71 | special: SpecialOptions::Normal, 72 | }; 73 | 74 | /// A struct describing the options for a compressor or compression function. 75 | /// 76 | /// These values are not stable and still subject to change! 77 | #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] 78 | pub struct CompressionOptions { 79 | /// The maximum number of checks to make in the hash table for matches. 80 | /// 81 | /// Higher numbers mean slower, but better compression. Very high (say `>1024`) values 82 | /// will impact compression speed a lot. The maximum match length is 2^15, so values higher than 83 | /// this won't make any difference, and will be truncated to 2^15 by the compression 84 | /// function/writer. 85 | /// 86 | /// Default value: `128` 87 | pub max_hash_checks: u16, 88 | // pub _window_size: u16, 89 | /// Only lazy match if we have a length less than this value. 90 | /// 91 | /// Higher values degrade compression slightly, but improve compression speed. 92 | /// 93 | /// * `0`: Never lazy match. (Same effect as setting `MatchingType` to greedy, but may be slower). 94 | /// * `1...257`: Only check for a better match if the first match was shorter than this value. 95 | /// * `258`: Always lazy match. 96 | /// 97 | /// As the maximum length of a match is `258`, values higher than this will have 98 | /// no further effect. 99 | /// 100 | /// * Default value: `32` 101 | pub lazy_if_less_than: u16, 102 | 103 | // pub _decent_match: u16, 104 | /// Whether to use lazy or greedy matching. 105 | /// 106 | /// Lazy matching will provide better compression, at the expense of compression speed. 107 | /// 108 | /// As a special case, if max_hash_checks is set to 0, and matching_type is set to lazy, 109 | /// compression using only run-length encoding (i.e maximum match distance of 1) is performed. 110 | /// (This may be changed in the future but is defined like this at the moment to avoid API 111 | /// breakage. 112 | /// 113 | /// [See `MatchingType`](./enum.MatchingType.html) 114 | /// 115 | /// * Default value: `MatchingType::Lazy` 116 | pub matching_type: MatchingType, 117 | /// Force fixed/stored blocks (Not implemented yet). 118 | /// * Default value: `SpecialOptions::Normal` 119 | pub special: SpecialOptions, 120 | } 121 | 122 | // Some standard profiles for the compression options. 123 | // Ord should be implemented at some point, but won't yet until the struct is stabilised. 124 | impl CompressionOptions { 125 | /// Returns compression settings roughly corresponding to the `HIGH(9)` setting in miniz. 126 | pub const fn high() -> CompressionOptions { 127 | CompressionOptions { 128 | max_hash_checks: HIGH_MAX_HASH_CHECKS, 129 | lazy_if_less_than: HIGH_LAZY_IF_LESS_THAN, 130 | matching_type: MatchingType::Lazy, 131 | special: SpecialOptions::Normal, 132 | } 133 | } 134 | 135 | /// Returns a fast set of compression settings 136 | /// 137 | /// Ideally this should roughly correspond to the `FAST(1)` setting in miniz. 138 | /// However, that setting makes miniz use a somewhat different algorithm, 139 | /// so currently hte fast level in this library is slower and better compressing 140 | /// than the corresponding level in miniz. 141 | pub const fn fast() -> CompressionOptions { 142 | CompressionOptions { 143 | max_hash_checks: 1, 144 | lazy_if_less_than: 0, 145 | matching_type: MatchingType::Greedy, 146 | special: SpecialOptions::Normal, 147 | } 148 | } 149 | 150 | /// Returns a set of compression settings that makes the compressor only compress using 151 | /// Huffman coding. (Ignoring any length/distance matching) 152 | /// 153 | /// This will normally have the worst compression ratio (besides only using uncompressed data), 154 | /// but may be the fastest method in some cases. 155 | pub const fn huffman_only() -> CompressionOptions { 156 | CompressionOptions { 157 | max_hash_checks: 0, 158 | lazy_if_less_than: 0, 159 | matching_type: MatchingType::Greedy, 160 | special: SpecialOptions::Normal, 161 | } 162 | } 163 | 164 | /// Returns a set of compression settings that makes the compressor compress only using 165 | /// run-length encoding (i.e only looking for matches one byte back). 166 | /// 167 | /// This is very fast, but tends to compress worse than looking for more matches using hash 168 | /// chains that the slower settings do. 169 | /// Works best on data that has runs of equivalent bytes, like binary or simple images, 170 | /// less good for text. 171 | pub const fn rle() -> CompressionOptions { 172 | CompressionOptions { 173 | max_hash_checks: 0, 174 | lazy_if_less_than: 0, 175 | matching_type: MatchingType::Lazy, 176 | special: SpecialOptions::Normal, 177 | } 178 | } 179 | } 180 | 181 | impl Default for CompressionOptions { 182 | /// Returns the options describing the default compression level. 183 | fn default() -> CompressionOptions { 184 | DEFAULT_OPTIONS 185 | } 186 | } 187 | 188 | impl From for CompressionOptions { 189 | fn from(compression: Compression) -> CompressionOptions { 190 | match compression { 191 | Compression::Fast => CompressionOptions::fast(), 192 | Compression::Default => CompressionOptions::default(), 193 | Compression::Best => CompressionOptions::high(), 194 | } 195 | } 196 | } 197 | -------------------------------------------------------------------------------- /src/deflate_state.rs: -------------------------------------------------------------------------------- 1 | use std::io::Write; 2 | use std::{cmp, io, mem}; 3 | 4 | use crate::compress::Flush; 5 | use crate::compression_options::{CompressionOptions, MAX_HASH_CHECKS}; 6 | use crate::encoder_state::EncoderState; 7 | pub use crate::huffman_table::MAX_MATCH; 8 | use crate::huffman_table::NUM_LITERALS_AND_LENGTHS; 9 | use crate::input_buffer::InputBuffer; 10 | use crate::length_encode::{EncodedLength, LeafVec}; 11 | use crate::lz77::LZ77State; 12 | use crate::output_writer::DynamicWriter; 13 | 14 | /// A counter used for checking values in debug mode. 15 | /// Does nothing when debug assertions are disabled. 16 | #[derive(Default)] 17 | pub struct DebugCounter { 18 | #[cfg(debug_assertions)] 19 | count: u64, 20 | } 21 | 22 | impl DebugCounter { 23 | #[cfg(debug_assertions)] 24 | pub const fn get(&self) -> u64 { 25 | self.count 26 | } 27 | 28 | #[cfg(not(debug_assertions))] 29 | pub const fn get(&self) -> u64 { 30 | 0 31 | } 32 | 33 | #[cfg(debug_assertions)] 34 | pub fn reset(&mut self) { 35 | self.count = 0; 36 | } 37 | 38 | #[cfg(not(debug_assertions))] 39 | pub fn reset(&self) {} 40 | 41 | #[cfg(debug_assertions)] 42 | pub fn add(&mut self, val: u64) { 43 | self.count += val; 44 | } 45 | 46 | #[cfg(not(debug_assertions))] 47 | pub fn add(&self, _: u64) {} 48 | } 49 | 50 | pub struct LengthBuffers { 51 | pub leaf_buf: LeafVec, 52 | pub length_buf: Vec, 53 | } 54 | 55 | impl LengthBuffers { 56 | #[inline] 57 | fn new() -> LengthBuffers { 58 | LengthBuffers { 59 | leaf_buf: Vec::with_capacity(NUM_LITERALS_AND_LENGTHS), 60 | length_buf: Vec::with_capacity(19), 61 | } 62 | } 63 | } 64 | 65 | /// A struct containing all the stored state used for the encoder. 66 | pub struct DeflateState { 67 | /// State of lz77 compression. 68 | pub lz77_state: LZ77State, 69 | pub input_buffer: InputBuffer, 70 | pub compression_options: CompressionOptions, 71 | /// State the Huffman part of the compression and the output buffer. 72 | pub encoder_state: EncoderState, 73 | /// The buffer containing the raw output of the lz77-encoding. 74 | pub lz77_writer: DynamicWriter, 75 | /// Buffers used when generating Huffman code lengths. 76 | pub length_buffers: LengthBuffers, 77 | /// Total number of bytes consumed/written to the input buffer. 78 | pub bytes_written: u64, 79 | /// Wrapped writer. 80 | /// Option is used to allow us to implement `Drop` and `finish()` at the same time for the 81 | /// writer structs. 82 | pub inner: Option, 83 | /// The position in the output buffer where data should be flushed from, to keep track of 84 | /// what data has been output in case not all data is output when writing to the wrapped 85 | /// writer. 86 | pub output_buf_pos: usize, 87 | pub flush_mode: Flush, 88 | /// Whether we need to flush everything before continuing. 89 | /// Currently only used after having output a sync flush. 90 | /// This is implemented in a somewhat clunky manner at the moment, 91 | /// ideally it should be done in a more fail-safe way to avoid 92 | /// further bugs. 93 | pub needs_flush: bool, 94 | /// Number of bytes written as calculated by sum of block input lengths. 95 | /// Used to check that they are correct when `debug_assertions` are enabled. 96 | pub bytes_written_control: DebugCounter, 97 | } 98 | 99 | impl DeflateState { 100 | pub fn new(compression_options: CompressionOptions, writer: W) -> DeflateState { 101 | DeflateState { 102 | input_buffer: InputBuffer::empty(), 103 | lz77_state: LZ77State::new( 104 | compression_options.max_hash_checks, 105 | cmp::min(compression_options.lazy_if_less_than, MAX_HASH_CHECKS), 106 | compression_options.matching_type, 107 | ), 108 | encoder_state: EncoderState::new(Vec::with_capacity(1024 * 32)), 109 | lz77_writer: DynamicWriter::new(), 110 | length_buffers: LengthBuffers::new(), 111 | compression_options, 112 | bytes_written: 0, 113 | inner: Some(writer), 114 | output_buf_pos: 0, 115 | flush_mode: Flush::None, 116 | needs_flush: false, 117 | bytes_written_control: DebugCounter::default(), 118 | } 119 | } 120 | 121 | #[inline] 122 | pub fn output_buf(&mut self) -> &mut Vec { 123 | self.encoder_state.inner_vec() 124 | } 125 | 126 | /// Resets the status of the decoder, leaving the compression options intact 127 | /// 128 | /// If flushing the current writer succeeds, it is replaced with the provided one, 129 | /// buffers and status (except compression options) is reset and the old writer 130 | /// is returned. 131 | /// 132 | /// If flushing fails, the rest of the writer is not cleared. 133 | pub fn reset(&mut self, writer: W) -> io::Result { 134 | self.encoder_state.flush(); 135 | self.inner 136 | .as_mut() 137 | .expect("Missing writer!") 138 | .write_all(self.encoder_state.inner_vec())?; 139 | self.encoder_state.inner_vec().clear(); 140 | self.input_buffer = InputBuffer::empty(); 141 | self.lz77_writer.clear(); 142 | self.lz77_state.reset(); 143 | self.bytes_written = 0; 144 | self.output_buf_pos = 0; 145 | self.flush_mode = Flush::None; 146 | self.needs_flush = false; 147 | if cfg!(debug_assertions) { 148 | self.bytes_written_control.reset(); 149 | } 150 | mem::replace(&mut self.inner, Some(writer)) 151 | .ok_or_else(|| io::Error::new(io::ErrorKind::Other, "Missing writer")) 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/encoder_state.rs: -------------------------------------------------------------------------------- 1 | use crate::bitstream::LsbWriter; 2 | use crate::huffman_table::HuffmanTable; 3 | use crate::lzvalue::LZType; 4 | #[cfg(test)] 5 | use std::mem; 6 | 7 | // The first bits of each block, which describe the type of the block 8 | // `-TTF` - TT = type, 00 = stored, 01 = fixed, 10 = dynamic, 11 = reserved, F - 1 if final block 9 | // `0000`; 10 | const FIXED_FIRST_BYTE: u16 = 0b010; 11 | const FIXED_FIRST_BYTE_FINAL: u16 = 0b011; 12 | const DYNAMIC_FIRST_BYTE: u16 = 0b100; 13 | const DYNAMIC_FIRST_BYTE_FINAL: u16 = 0b101; 14 | 15 | #[allow(dead_code)] 16 | pub enum BType { 17 | NoCompression = 0b00, 18 | FixedHuffman = 0b01, 19 | DynamicHuffman = 0b10, // Reserved = 0b11, //Error 20 | } 21 | 22 | /// A struct wrapping a writer that writes data compressed using the provided Huffman table 23 | pub struct EncoderState { 24 | pub huffman_table: HuffmanTable, 25 | pub writer: LsbWriter, 26 | } 27 | 28 | impl EncoderState { 29 | /// Creates a new encoder state using the provided Huffman table and writer 30 | pub fn new(writer: Vec) -> EncoderState { 31 | EncoderState { 32 | huffman_table: HuffmanTable::empty(), 33 | writer: LsbWriter::new(writer), 34 | } 35 | } 36 | 37 | #[cfg(test)] 38 | /// Creates a new encoder state using the fixed Huffman table 39 | pub fn fixed(writer: Vec) -> EncoderState { 40 | EncoderState { 41 | huffman_table: HuffmanTable::fixed_table(), 42 | writer: LsbWriter::new(writer), 43 | } 44 | } 45 | 46 | pub fn inner_vec(&mut self) -> &mut Vec { 47 | &mut self.writer.w 48 | } 49 | 50 | /// Encodes a literal value to the writer 51 | fn write_literal(&mut self, value: u8) { 52 | let code = self.huffman_table.get_literal(value); 53 | debug_assert!(code.length > 0); 54 | self.writer.write_bits(code.code, code.length); 55 | } 56 | 57 | /// Write a LZvalue to the contained writer, returning Err if the write operation fails 58 | pub fn write_lzvalue(&mut self, value: LZType) { 59 | match value { 60 | LZType::Literal(l) => self.write_literal(l), 61 | LZType::StoredLengthDistance(l, d) => { 62 | let (code, extra_bits_code) = self.huffman_table.get_length_huffman(l); 63 | debug_assert!( 64 | code.length != 0, 65 | "Code: {:?}, Value: {:?}", code, value 66 | ); 67 | self.writer.write_bits(code.code, code.length); 68 | self.writer 69 | .write_bits(extra_bits_code.code, extra_bits_code.length); 70 | 71 | let (code, extra_bits_code) = self.huffman_table.get_distance_huffman(d); 72 | debug_assert!( 73 | code.length != 0, 74 | "Code: {:?}, Value: {:?}", code, value 75 | ); 76 | 77 | self.writer.write_bits(code.code, code.length); 78 | self.writer 79 | .write_bits(extra_bits_code.code, extra_bits_code.length) 80 | } 81 | }; 82 | } 83 | 84 | /// Write the start of a block, returning Err if the write operation fails. 85 | pub fn write_start_of_block(&mut self, fixed: bool, final_block: bool) { 86 | if final_block { 87 | // The final block has one bit flipped to indicate it's 88 | // the final one 89 | if fixed { 90 | self.writer.write_bits(FIXED_FIRST_BYTE_FINAL, 3) 91 | } else { 92 | self.writer.write_bits(DYNAMIC_FIRST_BYTE_FINAL, 3) 93 | } 94 | } else if fixed { 95 | self.writer.write_bits(FIXED_FIRST_BYTE, 3) 96 | } else { 97 | self.writer.write_bits(DYNAMIC_FIRST_BYTE, 3) 98 | } 99 | } 100 | 101 | /// Write the end of block code 102 | pub fn write_end_of_block(&mut self) { 103 | let code = self.huffman_table.get_end_of_block(); 104 | self.writer.write_bits(code.code, code.length) 105 | } 106 | 107 | /// Flush the contained writer and it's bitstream wrapper. 108 | pub fn flush(&mut self) { 109 | self.writer.flush_raw() 110 | } 111 | 112 | pub fn set_huffman_to_fixed(&mut self) { 113 | self.huffman_table.set_to_fixed() 114 | } 115 | 116 | /// Reset the encoder state with a new writer, returning the old one if flushing 117 | /// succeeds. 118 | #[cfg(test)] 119 | pub fn reset(&mut self, writer: Vec) -> Vec { 120 | // Make sure the writer is flushed 121 | // Ideally this should be done before this function is called, but we 122 | // do it here just in case. 123 | self.flush(); 124 | // Reset the huffman table 125 | // This probably isn't needed, but again, we do it just in case to avoid leaking any data 126 | // If this turns out to be a performance issue, it can probably be ignored later. 127 | self.huffman_table = HuffmanTable::empty(); 128 | mem::replace(&mut self.writer.w, writer) 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/huffman_lengths.rs: -------------------------------------------------------------------------------- 1 | use crate::bitstream::LsbWriter; 2 | use crate::deflate_state::LengthBuffers; 3 | use crate::huffman_table::{ 4 | create_codes_in_place, num_extra_bits_for_distance_code, num_extra_bits_for_length_code, 5 | HuffmanTable, FIXED_CODE_LENGTHS, LENGTH_BITS_START, MAX_CODE_LENGTH, NUM_DISTANCE_CODES, 6 | NUM_LITERALS_AND_LENGTHS, 7 | }; 8 | use crate::length_encode::{ 9 | encode_lengths_m, huffman_lengths_from_frequency_m, EncodedLength, COPY_PREVIOUS, 10 | REPEAT_ZERO_3_BITS, REPEAT_ZERO_7_BITS, 11 | }; 12 | use crate::output_writer::FrequencyType; 13 | use crate::stored_block::MAX_STORED_BLOCK_LENGTH; 14 | 15 | use std::cmp; 16 | 17 | /// The minimum number of literal/length values 18 | pub const MIN_NUM_LITERALS_AND_LENGTHS: usize = 257; 19 | /// The minimum number of distances 20 | pub const MIN_NUM_DISTANCES: usize = 1; 21 | 22 | const NUM_HUFFMAN_LENGTHS: usize = 19; 23 | 24 | /// The output ordering of the lengths for the Huffman codes used to encode the lengths 25 | /// used to build the full Huffman tree for length/literal codes. 26 | /// http://www.gzip.org/zlib/rfc-deflate.html#dyn 27 | const HUFFMAN_LENGTH_ORDER: [u8; NUM_HUFFMAN_LENGTHS] = [ 28 | 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, 29 | ]; 30 | 31 | // Number of bits used for the values specifying the number of codes 32 | const HLIT_BITS: u8 = 5; 33 | const HDIST_BITS: u8 = 5; 34 | const HCLEN_BITS: u8 = 4; 35 | 36 | /// The longest a Huffman code describing another Huffman length can be 37 | const MAX_HUFFMAN_CODE_LENGTH: usize = 7; 38 | 39 | // How many bytes (not including padding and the 3-bit block type) the stored block header takes up. 40 | const STORED_BLOCK_HEADER_LENGTH: u64 = 4; 41 | const BLOCK_MARKER_LENGTH: u8 = 3; 42 | 43 | /// Creates a new slice from the input slice that stops at the final non-zero value 44 | pub fn remove_trailing_zeroes + PartialEq>(input: &[T], min_length: usize) -> &[T] { 45 | let num_zeroes = input.iter().rev().take_while(|&a| *a == T::from(0)).count(); 46 | &input[0..cmp::max(input.len() - num_zeroes, min_length)] 47 | } 48 | 49 | /// How many extra bits the Huffman length code uses to represent a value. 50 | fn extra_bits_for_huffman_length_code(code: u8) -> u8 { 51 | match code { 52 | 16..=17 => 3, 53 | 18 => 7, 54 | _ => 0, 55 | } 56 | } 57 | 58 | /// Calculate how many bits the Huffman-encoded Huffman lengths will use. 59 | fn calculate_huffman_length(frequencies: &[FrequencyType], code_lengths: &[u8]) -> u64 { 60 | frequencies 61 | .iter() 62 | .zip(code_lengths) 63 | .enumerate() 64 | .fold(0, |acc, (n, (&f, &l))| { 65 | acc + (u64::from(f) 66 | * (u64::from(l) + u64::from(extra_bits_for_huffman_length_code(n as u8)))) 67 | }) 68 | } 69 | 70 | /// Calculate how many bits data with the given frequencies will use when compressed with dynamic 71 | /// code lengths (first return value) and static code lengths (second return value). 72 | /// 73 | /// Parameters: 74 | /// Frequencies, length of dynamic codes, and a function to get how many extra bits in addition 75 | /// to the length of the Huffman code the symbol will use. 76 | fn calculate_block_length( 77 | frequencies: &[FrequencyType], 78 | dyn_code_lengths: &[u8], 79 | get_num_extra_bits: &F, 80 | ) -> (u64, u64) 81 | where 82 | F: Fn(usize) -> u64, 83 | { 84 | // Length of data represented by dynamic codes. 85 | let mut d_ll_length = 0u64; 86 | // length of data represented by static codes. 87 | let mut s_ll_length = 0u64; 88 | 89 | let iter = frequencies 90 | .iter() 91 | .zip(dyn_code_lengths.iter().zip(FIXED_CODE_LENGTHS.iter())) 92 | .enumerate(); 93 | 94 | // This could maybe be optimised a bit by splitting the iteration of codes using extra bits and 95 | // codes not using extra bits, but the extra complexity may not be worth it. 96 | for (c, (&f, (&l, &fl))) in iter { 97 | // Frequency 98 | let f = u64::from(f); 99 | // How many extra bits the current code number needs. 100 | let extra_bits_for_code = get_num_extra_bits(c); 101 | 102 | d_ll_length += f * (u64::from(l) + extra_bits_for_code); 103 | s_ll_length += f * (u64::from(fl) + extra_bits_for_code); 104 | } 105 | 106 | (d_ll_length, s_ll_length) 107 | } 108 | 109 | /// Get how extra padding bits after a block start header a stored block would use. 110 | /// 111 | /// # Panics 112 | /// Panics if `pending_bits > 8` 113 | fn stored_padding(pending_bits: u8) -> u64 { 114 | assert!(pending_bits <= 8); 115 | let free_space = 8 - pending_bits; 116 | if free_space >= BLOCK_MARKER_LENGTH { 117 | // There is space in the current byte for the header. 118 | free_space - BLOCK_MARKER_LENGTH 119 | } else { 120 | // The header will require an extra byte. 121 | 8 - (BLOCK_MARKER_LENGTH - free_space) 122 | } 123 | .into() 124 | } 125 | 126 | /// Calculate the number of bits storing the data in stored blocks will take up, excluding the 127 | /// first block start code and potential padding bits. As stored blocks have a maximum length, 128 | /// (as opposed to fixed and dynamic ones), multiple blocks may have to be utilised. 129 | /// 130 | /// # Panics 131 | /// Panics if `input_bytes` is 0. 132 | fn stored_length(input_bytes: u64) -> u64 { 133 | // Check how many stored blocks these bytes would take up. 134 | // (Integer divison rounding up.) 135 | let num_blocks = (input_bytes 136 | .checked_sub(1) 137 | .expect("Underflow calculating stored block length!") 138 | / MAX_STORED_BLOCK_LENGTH as u64) 139 | + 1; 140 | // The length will be the input length and the headers for each block. (Excluding the start 141 | // of block code for the first one) 142 | (input_bytes + (STORED_BLOCK_HEADER_LENGTH as u64 * num_blocks) + (num_blocks - 1)) * 8 143 | } 144 | 145 | pub enum BlockType { 146 | Stored, 147 | Fixed, 148 | Dynamic(DynamicBlockHeader), 149 | } 150 | 151 | /// A struct containing the different data needed to write the header for a dynamic block. 152 | /// 153 | /// The code lengths are stored directly in the `HuffmanTable` struct. 154 | /// TODO: Do the same for other things here. 155 | pub struct DynamicBlockHeader { 156 | /// Length of the run-length encoding symbols. 157 | pub huffman_table_lengths: Vec, 158 | /// Number of lengths for values describing the Huffman table that encodes the length values 159 | /// of the main Huffman tables. 160 | pub used_hclens: usize, 161 | } 162 | 163 | /// Generate the lengths of the Huffman codes we will be using, using the 164 | /// frequency of the different symbols/lengths/distances, and determine what block type will give 165 | /// the shortest representation. 166 | /// TODO: This needs a test 167 | pub fn gen_huffman_lengths( 168 | l_freqs: &[FrequencyType], 169 | d_freqs: &[FrequencyType], 170 | num_input_bytes: u64, 171 | pending_bits: u8, 172 | l_lengths: &mut [u8; 288], 173 | d_lengths: &mut [u8; 32], 174 | length_buffers: &mut LengthBuffers, 175 | ) -> BlockType { 176 | // Avoid corner cases and issues if this is called for an empty block. 177 | // For blocks this short, a fixed block will be the shortest. 178 | // TODO: Find the minimum value it's worth doing calculations for. 179 | if num_input_bytes <= 4 { 180 | return BlockType::Fixed; 181 | }; 182 | 183 | let l_freqs = remove_trailing_zeroes(l_freqs, MIN_NUM_LITERALS_AND_LENGTHS); 184 | let d_freqs = remove_trailing_zeroes(d_freqs, MIN_NUM_DISTANCES); 185 | 186 | // The huffman spec allows us to exclude zeroes at the end of the 187 | // table of huffman lengths. 188 | // Since a frequency of 0 will give an huffman 189 | // length of 0. We strip off the trailing zeroes before even 190 | // generating the lengths to save some work. 191 | // There is however a minimum number of values we have to keep 192 | // according to the deflate spec. 193 | // TODO: We could probably compute some of this in parallel. 194 | huffman_lengths_from_frequency_m( 195 | l_freqs, 196 | MAX_CODE_LENGTH, 197 | &mut length_buffers.leaf_buf, 198 | l_lengths, 199 | ); 200 | huffman_lengths_from_frequency_m( 201 | d_freqs, 202 | MAX_CODE_LENGTH, 203 | &mut length_buffers.leaf_buf, 204 | d_lengths, 205 | ); 206 | 207 | let used_lengths = l_freqs.len(); 208 | let used_distances = d_freqs.len(); 209 | 210 | // Encode length values 211 | let mut freqs = [0u16; 19]; 212 | encode_lengths_m( 213 | l_lengths[..used_lengths] 214 | .iter() 215 | .chain(&d_lengths[..used_distances]), 216 | &mut length_buffers.length_buf, 217 | &mut freqs, 218 | ); 219 | 220 | // Create huffman lengths for the length/distance code lengths 221 | let mut huffman_table_lengths = vec![0; freqs.len()]; 222 | huffman_lengths_from_frequency_m( 223 | &freqs, 224 | MAX_HUFFMAN_CODE_LENGTH, 225 | &mut length_buffers.leaf_buf, 226 | huffman_table_lengths.as_mut_slice(), 227 | ); 228 | 229 | // Count how many of these lengths we use. 230 | let used_hclens = HUFFMAN_LENGTH_ORDER.len() 231 | - HUFFMAN_LENGTH_ORDER 232 | .iter() 233 | .rev() 234 | .take_while(|&&n| huffman_table_lengths[n as usize] == 0) 235 | .count(); 236 | 237 | // There has to be at least 4 hclens, so if there isn't, something went wrong. 238 | debug_assert!(used_hclens >= 4); 239 | 240 | // Calculate how many bytes of space this block will take up with the different block types 241 | // (excluding the 3-bit block header since it's used in all block types). 242 | 243 | // Total length of the compressed literals/lengths. 244 | let (d_ll_length, s_ll_length) = calculate_block_length(l_freqs, l_lengths, &|c| { 245 | num_extra_bits_for_length_code(c.saturating_sub(LENGTH_BITS_START as usize) as u8).into() 246 | }); 247 | 248 | // Total length of the compressed distances. 249 | let (d_dist_length, s_dist_length) = calculate_block_length(d_freqs, d_lengths, &|c| { 250 | num_extra_bits_for_distance_code(c as u8).into() 251 | }); 252 | 253 | // Total length of the compressed huffman code lengths. 254 | let huff_table_length = calculate_huffman_length(&freqs, &huffman_table_lengths); 255 | 256 | // For dynamic blocks the huffman tables takes up some extra space. 257 | let dynamic_length = d_ll_length 258 | + d_dist_length 259 | + huff_table_length 260 | + (used_hclens as u64 * 3) 261 | + u64::from(HLIT_BITS) 262 | + u64::from(HDIST_BITS) 263 | + u64::from(HCLEN_BITS); 264 | 265 | // Static blocks don't have any extra header data. 266 | let static_length = s_ll_length + s_dist_length; 267 | 268 | // Calculate how many bits it will take to store the data in uncompressed (stored) block(s). 269 | let stored_length = stored_length(num_input_bytes) + stored_padding(pending_bits % 8); 270 | 271 | let used_length = cmp::min(cmp::min(dynamic_length, static_length), stored_length); 272 | 273 | // Check if the block is actually compressed. If using a dynamic block 274 | // increases the length of the block (for instance if the input data is mostly random or 275 | // already compressed), we want to output a stored(uncompressed) block instead to avoid wasting 276 | // space. 277 | if used_length == static_length { 278 | BlockType::Fixed 279 | } else if used_length == stored_length { 280 | BlockType::Stored 281 | } else { 282 | BlockType::Dynamic(DynamicBlockHeader { 283 | huffman_table_lengths, 284 | used_hclens, 285 | }) 286 | } 287 | } 288 | 289 | /// Write the specified Huffman lengths to the bit writer 290 | pub fn write_huffman_lengths( 291 | header: &DynamicBlockHeader, 292 | huffman_table: &HuffmanTable, 293 | encoded_lengths: &[EncodedLength], 294 | writer: &mut LsbWriter, 295 | ) { 296 | // Ignore trailing zero lengths as allowed by the deflate spec. 297 | let (literal_len_lengths, distance_lengths) = huffman_table.get_lengths(); 298 | let literal_len_lengths = 299 | remove_trailing_zeroes(literal_len_lengths, MIN_NUM_LITERALS_AND_LENGTHS); 300 | let distance_lengths = remove_trailing_zeroes(distance_lengths, MIN_NUM_DISTANCES); 301 | let huffman_table_lengths = &header.huffman_table_lengths; 302 | let used_hclens = header.used_hclens; 303 | 304 | assert!(literal_len_lengths.len() <= NUM_LITERALS_AND_LENGTHS); 305 | assert!(literal_len_lengths.len() >= MIN_NUM_LITERALS_AND_LENGTHS); 306 | assert!(distance_lengths.len() <= NUM_DISTANCE_CODES); 307 | assert!(distance_lengths.len() >= MIN_NUM_DISTANCES); 308 | 309 | // Number of length codes - 257. 310 | let hlit = (literal_len_lengths.len() - MIN_NUM_LITERALS_AND_LENGTHS) as u16; 311 | writer.write_bits(hlit, HLIT_BITS); 312 | // Number of distance codes - 1. 313 | let hdist = (distance_lengths.len() - MIN_NUM_DISTANCES) as u16; 314 | writer.write_bits(hdist, HDIST_BITS); 315 | 316 | // Number of huffman table lengths - 4. 317 | let hclen = used_hclens.saturating_sub(4); 318 | 319 | // Write HCLEN. 320 | // Casting to u16 is safe since the length can never be more than the length of 321 | // `HUFFMAN_LENGTH_ORDER` anyhow. 322 | writer.write_bits(hclen as u16, HCLEN_BITS); 323 | 324 | // Write the lengths for the huffman table describing the huffman table 325 | // Each length is 3 bits 326 | for n in &HUFFMAN_LENGTH_ORDER[..used_hclens] { 327 | writer.write_bits(u16::from(huffman_table_lengths[usize::from(*n)]), 3); 328 | } 329 | 330 | // Generate codes for the main huffman table using the lengths we just wrote 331 | let mut codes = [0u16; NUM_HUFFMAN_LENGTHS]; 332 | create_codes_in_place(&mut codes[..], huffman_table_lengths); 333 | 334 | // Write the actual huffman lengths 335 | for v in encoded_lengths { 336 | match *v { 337 | EncodedLength::Length(n) => { 338 | let (c, l) = (codes[usize::from(n)], huffman_table_lengths[usize::from(n)]); 339 | writer.write_bits(c, l); 340 | } 341 | EncodedLength::CopyPrevious(n) => { 342 | let (c, l) = (codes[COPY_PREVIOUS], huffman_table_lengths[COPY_PREVIOUS]); 343 | writer.write_bits(c, l); 344 | debug_assert!(n >= 3); 345 | debug_assert!(n <= 6); 346 | writer.write_bits((n - 3).into(), 2); 347 | } 348 | EncodedLength::RepeatZero3Bits(n) => { 349 | let (c, l) = ( 350 | codes[REPEAT_ZERO_3_BITS], 351 | huffman_table_lengths[REPEAT_ZERO_3_BITS], 352 | ); 353 | writer.write_bits(c, l); 354 | debug_assert!(n >= 3); 355 | writer.write_bits((n - 3).into(), 3); 356 | } 357 | EncodedLength::RepeatZero7Bits(n) => { 358 | let (c, l) = ( 359 | codes[REPEAT_ZERO_7_BITS], 360 | huffman_table_lengths[REPEAT_ZERO_7_BITS], 361 | ); 362 | writer.write_bits(c, l); 363 | debug_assert!(n >= 11); 364 | debug_assert!(n <= 138); 365 | writer.write_bits((n - 11).into(), 7); 366 | } 367 | } 368 | } 369 | } 370 | 371 | #[cfg(test)] 372 | mod test { 373 | use super::stored_padding; 374 | #[test] 375 | fn padding() { 376 | assert_eq!(stored_padding(0), 5); 377 | assert_eq!(stored_padding(1), 4); 378 | assert_eq!(stored_padding(2), 3); 379 | assert_eq!(stored_padding(3), 2); 380 | assert_eq!(stored_padding(4), 1); 381 | assert_eq!(stored_padding(5), 0); 382 | assert_eq!(stored_padding(6), 7); 383 | assert_eq!(stored_padding(7), 6); 384 | } 385 | } 386 | -------------------------------------------------------------------------------- /src/huffman_table.rs: -------------------------------------------------------------------------------- 1 | use crate::bit_reverse::reverse_bits; 2 | use crate::lzvalue::StoredLength; 3 | use std::fmt; 4 | 5 | /// The number of length codes in the Huffman table 6 | pub const NUM_LENGTH_CODES: usize = 29; 7 | 8 | /// The number of distance codes in the distance Huffman table 9 | // NOTE: two mode codes are actually used when constructing codes 10 | pub const NUM_DISTANCE_CODES: usize = 30; 11 | 12 | /// Combined number of literal and length codes 13 | // NOTE: two mode codes are actually used when constructing codes 14 | pub const NUM_LITERALS_AND_LENGTHS: usize = 286; 15 | 16 | /// The maximum length of a Huffman code 17 | pub const MAX_CODE_LENGTH: usize = 15; 18 | 19 | /// The minimum and maximum lengths for a match according to the DEFLATE specification 20 | pub const MIN_MATCH: u16 = 3; 21 | pub const MAX_MATCH: u16 = 258; 22 | 23 | #[cfg(test)] 24 | pub const MIN_DISTANCE: u16 = 1; 25 | pub const MAX_DISTANCE: u16 = 32768; 26 | 27 | /// The position in the literal/length table of the end of block symbol 28 | pub const END_OF_BLOCK_POSITION: usize = 256; 29 | 30 | /// Bit lengths for literal and length codes in the fixed Huffman table 31 | /// The Huffman codes are generated from this and the distance bit length table 32 | pub static FIXED_CODE_LENGTHS: [u8; NUM_LITERALS_AND_LENGTHS + 2] = [ 33 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 34 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 35 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 36 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 37 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 38 | 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 39 | 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 40 | 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 41 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 42 | ]; 43 | 44 | /// The number of extra bits for the length codes 45 | const LENGTH_EXTRA_BITS_LENGTH: [u8; NUM_LENGTH_CODES] = [ 46 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 47 | ]; 48 | 49 | /// Table used to get a code from a length value (see get_distance_code_and_extra_bits) 50 | const LENGTH_CODE: [u8; 256] = [ 51 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 52 | 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 53 | 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 54 | 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 55 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 56 | 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 57 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 58 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 59 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 60 | 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 61 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 62 | ]; 63 | 64 | /// Base values to calculate the value of the bits in length codes 65 | const BASE_LENGTH: [u8; NUM_LENGTH_CODES] = [ 66 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 67 | 160, 192, 224, 255, 68 | ]; // 258 - MIN_MATCh 69 | 70 | /// What number in the literal/length table the lengths start at 71 | pub const LENGTH_BITS_START: u16 = 257; 72 | 73 | /// Lengths for the distance codes in the pre-defined/fixed Huffman table 74 | /// (All distance codes are 5 bits long) 75 | pub const FIXED_CODE_LENGTHS_DISTANCE: [u8; NUM_DISTANCE_CODES + 2] = [5; NUM_DISTANCE_CODES + 2]; 76 | 77 | const DISTANCE_CODES: [u8; 512] = [ 78 | 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 79 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 80 | 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 81 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 82 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 83 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 84 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 85 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 86 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 87 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 88 | 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 89 | 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 90 | 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 91 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 92 | 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 93 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 94 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 95 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 96 | 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 97 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 98 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 99 | ]; 100 | 101 | /// Number of extra bits following the distance codes 102 | #[cfg(test)] 103 | const DISTANCE_EXTRA_BITS: [u8; NUM_DISTANCE_CODES] = [ 104 | 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 105 | 13, 106 | ]; 107 | 108 | const DISTANCE_BASE: [u16; NUM_DISTANCE_CODES] = [ 109 | 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 110 | 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 111 | ]; 112 | 113 | pub const fn num_extra_bits_for_length_code(code: u8) -> u8 { 114 | LENGTH_EXTRA_BITS_LENGTH[code as usize] 115 | } 116 | 117 | /// Get the number of extra bits used for a distance code. 118 | /// (Code numbers above `NUM_DISTANCE_CODES` will give some garbage 119 | /// value.) 120 | pub fn num_extra_bits_for_distance_code(code: u8) -> u8 { 121 | // This can be easily calculated without a lookup. 122 | // 123 | let mut c = code >> 1; 124 | c -= (c != 0) as u8; 125 | c 126 | } 127 | 128 | /// A struct representing the data needed to generate the bit codes for 129 | /// a given value and Huffman table. 130 | #[derive(Copy, Clone)] 131 | struct ExtraBits { 132 | /// The position of the length in the Huffman table. 133 | pub code_number: u16, 134 | /// Number of extra bits following the code. 135 | pub num_bits: u8, 136 | /// The value of the extra bits, which together with the length/distance code 137 | /// allow us to calculate the exact length/distance. 138 | pub value: u16, 139 | } 140 | 141 | /// Get the length code that corresponds to the length value 142 | /// Panics if length is out of range. 143 | pub fn get_length_code(length: u16) -> usize { 144 | // Going via an u8 here helps the compiler evade bounds checking. 145 | usize::from(LENGTH_CODE[(length.wrapping_sub(MIN_MATCH)) as u8 as usize]) 146 | + LENGTH_BITS_START as usize 147 | } 148 | 149 | /// Get the code for the Huffman table and the extra bits for the requested length. 150 | fn get_length_code_and_extra_bits(length: StoredLength) -> ExtraBits { 151 | // Length values are stored as unsigned bytes, where the actual length is the value - 3 152 | // The `StoredLength` struct takes care of this conversion for us. 153 | let n = LENGTH_CODE[length.stored_length() as usize]; 154 | 155 | // We can then get the base length from the base length table, 156 | // which we use to calculate the value of the extra bits. 157 | let base = BASE_LENGTH[n as usize]; 158 | let num_bits = num_extra_bits_for_length_code(n); 159 | ExtraBits { 160 | code_number: n as u16 + LENGTH_BITS_START, 161 | num_bits, 162 | value: (length.stored_length() - base) as u16, 163 | } 164 | } 165 | 166 | /// Get the spot in the Huffman table for distances `distance` corresponds to 167 | /// Returns 255 if the distance is invalid. 168 | /// Avoiding option here for simplicity and performance) as this being called with an invalid 169 | /// value would be a bug. 170 | pub fn get_distance_code(distance: u16) -> u8 { 171 | let distance = distance as usize; 172 | 173 | match distance { 174 | // Since the array starts at 0, we need to subtract 1 to get the correct code number. 175 | 1..=256 => DISTANCE_CODES[distance - 1], 176 | // Due to the distrubution of the distance codes above 256, we can get away with only 177 | // using the top bits to determine the code, rather than having a 32k long table of 178 | // distance codes. 179 | 257..=32768 => DISTANCE_CODES[256 + ((distance - 1) >> 7)], 180 | _ => 0, 181 | } 182 | } 183 | 184 | fn get_distance_code_and_extra_bits(distance: u16) -> ExtraBits { 185 | let distance_code = get_distance_code(distance); 186 | let extra = num_extra_bits_for_distance_code(distance_code); 187 | // FIXME: We should add 1 to the values in distance_base to avoid having to add one here 188 | let base = DISTANCE_BASE[distance_code as usize] + 1; 189 | ExtraBits { 190 | code_number: distance_code.into(), 191 | num_bits: extra, 192 | value: distance - base, 193 | } 194 | } 195 | 196 | #[derive(Copy, Clone, Default)] 197 | pub struct HuffmanCode { 198 | pub code: u16, 199 | pub length: u8, 200 | } 201 | 202 | impl fmt::Debug for HuffmanCode { 203 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 204 | write!( 205 | f, 206 | "HuffmanCode {{ code: {:b}, length: {}}}", 207 | self.code, self.length 208 | ) 209 | } 210 | } 211 | 212 | impl HuffmanCode { 213 | #[inline] 214 | /// Create a Huffman code value from a code and length. 215 | const fn new(code: u16, length: u8) -> HuffmanCode { 216 | HuffmanCode { code, length } 217 | } 218 | } 219 | 220 | #[cfg(test)] 221 | pub struct LengthAndDistanceBits { 222 | pub length_code: HuffmanCode, 223 | pub length_extra_bits: HuffmanCode, 224 | pub distance_code: HuffmanCode, 225 | pub distance_extra_bits: HuffmanCode, 226 | } 227 | 228 | /// Counts the number of values of each length. 229 | /// Returns a tuple containing the longest length value in the table, it's position, 230 | /// and fills in lengths in the `len_counts` slice. 231 | /// Returns an error if `table` is empty, or if any of the lengths exceed 15. 232 | fn build_length_count_table(table: &[u8], len_counts: &mut [u16; 16]) -> (usize, usize) { 233 | // TODO: Validate the length table properly in debug mode. 234 | let max_length = (*table.iter().max().expect("BUG! Empty lengths!")).into(); 235 | 236 | assert!(max_length <= MAX_CODE_LENGTH); 237 | 238 | let mut max_length_pos = 0; 239 | 240 | for (n, &length) in table.iter().enumerate() { 241 | // TODO: Make sure we don't have more of one length than we can make 242 | // codes for 243 | if length > 0 { 244 | len_counts[usize::from(length)] += 1; 245 | max_length_pos = n; 246 | } 247 | } 248 | (max_length, max_length_pos) 249 | } 250 | 251 | /// Generates a vector of Huffman codes given a table of bit lengths 252 | /// Returns an error if any of the lengths are > 15 253 | pub fn create_codes_in_place(code_table: &mut [u16], length_table: &[u8]) { 254 | let mut len_counts = [0; 16]; 255 | let (max_length, max_length_pos) = build_length_count_table(length_table, &mut len_counts); 256 | let lengths = len_counts; 257 | 258 | let mut code = 0u16; 259 | let mut next_code = Vec::with_capacity(length_table.len()); 260 | next_code.push(code); 261 | 262 | for bits in 1..=max_length { 263 | code = (code + lengths[bits - 1]) << 1; 264 | next_code.push(code); 265 | } 266 | 267 | for n in 0..=max_length_pos { 268 | let length = usize::from(length_table[n]); 269 | if length != 0 { 270 | // The algorithm generates the code in the reverse bit order, so we need to reverse them 271 | // to get the correct codes. 272 | code_table[n] = reverse_bits(next_code[length], length as u8); 273 | // We use wrapping here as we would otherwise overflow on the last code 274 | // This should be okay as we exit the loop after this so the value is ignored 275 | next_code[length] = next_code[length].wrapping_add(1); 276 | } 277 | } 278 | } 279 | 280 | /// A structure containing the tables of Huffman codes for lengths, literals and distances 281 | pub struct HuffmanTable { 282 | // Literal, end of block and length codes 283 | codes: [u16; 288], 284 | code_lengths: [u8; 288], 285 | // Distance codes 286 | distance_codes: [u16; 32], 287 | distance_code_lengths: [u8; 32], 288 | } 289 | 290 | impl HuffmanTable { 291 | pub const fn empty() -> HuffmanTable { 292 | HuffmanTable { 293 | codes: [0; 288], 294 | code_lengths: [0; 288], 295 | distance_codes: [0; 32], 296 | distance_code_lengths: [0; 32], 297 | } 298 | } 299 | 300 | #[cfg(test)] 301 | pub fn from_length_tables( 302 | literals_and_lengths: &[u8; 288], 303 | distances: &[u8; 32], 304 | ) -> HuffmanTable { 305 | let mut table = HuffmanTable { 306 | codes: [0; 288], 307 | code_lengths: *literals_and_lengths, 308 | distance_codes: [0; 32], 309 | distance_code_lengths: *distances, 310 | }; 311 | 312 | table.update_from_lengths(); 313 | table 314 | } 315 | 316 | /// Get references to the lengths of the current Huffman codes. 317 | #[inline] 318 | pub const fn get_lengths(&self) -> (&[u8; 288], &[u8; 32]) { 319 | (&self.code_lengths, &self.distance_code_lengths) 320 | } 321 | 322 | /// Get mutable references to the lengths of the current Huffman codes. 323 | /// 324 | /// Used for updating the lengths in place. 325 | #[inline] 326 | pub fn get_lengths_mut(&mut self) -> (&mut [u8; 288], &mut [u8; 32]) { 327 | (&mut self.code_lengths, &mut self.distance_code_lengths) 328 | } 329 | 330 | /// Update the Huffman codes using the existing length values in the Huffman table. 331 | pub fn update_from_lengths(&mut self) { 332 | create_codes_in_place(self.codes.as_mut(), &self.code_lengths[..]); 333 | create_codes_in_place( 334 | self.distance_codes.as_mut(), 335 | &self.distance_code_lengths[..], 336 | ); 337 | } 338 | 339 | pub fn set_to_fixed(&mut self) { 340 | self.code_lengths = FIXED_CODE_LENGTHS; 341 | self.distance_code_lengths = FIXED_CODE_LENGTHS_DISTANCE; 342 | self.update_from_lengths(); 343 | } 344 | 345 | /// Create a `HuffmanTable` using the fixed tables specified in the DEFLATE format specification. 346 | #[cfg(test)] 347 | pub fn fixed_table() -> HuffmanTable { 348 | // This should be safe to unwrap, if it were to panic the code is wrong, 349 | // tests should catch it. 350 | HuffmanTable::from_length_tables(&FIXED_CODE_LENGTHS, &FIXED_CODE_LENGTHS_DISTANCE) 351 | } 352 | 353 | #[inline] 354 | const fn get_ll_huff(&self, value: usize) -> HuffmanCode { 355 | HuffmanCode::new(self.codes[value], self.code_lengths[value]) 356 | } 357 | 358 | /// Get the Huffman code from the corresponding literal value 359 | #[inline] 360 | pub fn get_literal(&self, value: u8) -> HuffmanCode { 361 | let index = usize::from(value); 362 | HuffmanCode::new(self.codes[index], self.code_lengths[index]) 363 | } 364 | 365 | /// Get the Huffman code for the end of block value 366 | #[inline] 367 | pub const fn get_end_of_block(&self) -> HuffmanCode { 368 | self.get_ll_huff(END_OF_BLOCK_POSITION) 369 | } 370 | 371 | /// Get the Huffman code and extra bits for the specified length 372 | #[inline] 373 | pub fn get_length_huffman(&self, length: StoredLength) -> (HuffmanCode, HuffmanCode) { 374 | let length_data = get_length_code_and_extra_bits(length); 375 | 376 | let length_huffman_code = self.get_ll_huff(length_data.code_number as usize); 377 | 378 | ( 379 | length_huffman_code, 380 | HuffmanCode { 381 | code: length_data.value, 382 | length: length_data.num_bits, 383 | }, 384 | ) 385 | } 386 | 387 | /// Get the Huffman code and extra bits for the specified distance 388 | /// 389 | /// Returns None if distance is 0 or above 32768 390 | #[inline] 391 | pub fn get_distance_huffman(&self, distance: u16) -> (HuffmanCode, HuffmanCode) { 392 | //debug_assert!(distance >= MIN_DISTANCE && distance <= MAX_DISTANCE); 393 | 394 | let distance_data = get_distance_code_and_extra_bits(distance); 395 | 396 | let distance_huffman_code = self.distance_codes[distance_data.code_number as usize]; 397 | let distance_huffman_length = 398 | self.distance_code_lengths[distance_data.code_number as usize]; 399 | 400 | ( 401 | HuffmanCode { 402 | code: distance_huffman_code, 403 | length: distance_huffman_length, 404 | }, 405 | HuffmanCode { 406 | code: distance_data.value, 407 | length: distance_data.num_bits, 408 | }, 409 | ) 410 | } 411 | 412 | #[cfg(test)] 413 | pub fn get_length_distance_code(&self, length: u16, distance: u16) -> LengthAndDistanceBits { 414 | assert!(length >= MIN_MATCH && length < MAX_DISTANCE); 415 | let l_codes = self.get_length_huffman(StoredLength::from_actual_length(length)); 416 | let d_codes = self.get_distance_huffman(distance); 417 | LengthAndDistanceBits { 418 | length_code: l_codes.0, 419 | length_extra_bits: l_codes.1, 420 | distance_code: d_codes.0, 421 | distance_extra_bits: d_codes.1, 422 | } 423 | } 424 | } 425 | 426 | #[cfg(test)] 427 | mod test { 428 | use super::*; 429 | use super::{ 430 | build_length_count_table, get_distance_code_and_extra_bits, get_length_code_and_extra_bits, 431 | }; 432 | 433 | use crate::lzvalue::StoredLength; 434 | 435 | fn l(length: u16) -> StoredLength { 436 | StoredLength::from_actual_length(length) 437 | } 438 | 439 | #[test] 440 | fn test_get_length_code() { 441 | let extra_bits = get_length_code_and_extra_bits(l(4)); 442 | assert_eq!(extra_bits.code_number, 258); 443 | assert_eq!(extra_bits.num_bits, 0); 444 | assert_eq!(extra_bits.value, 0); 445 | 446 | let extra_bits = get_length_code_and_extra_bits(l(165)); 447 | assert_eq!(extra_bits.code_number, 282); 448 | assert_eq!(extra_bits.num_bits, 5); 449 | assert_eq!(extra_bits.value, 2); 450 | 451 | let extra_bits = get_length_code_and_extra_bits(l(257)); 452 | assert_eq!(extra_bits.code_number, 284); 453 | assert_eq!(extra_bits.num_bits, 5); 454 | assert_eq!(extra_bits.value, 30); 455 | 456 | let extra_bits = get_length_code_and_extra_bits(l(258)); 457 | assert_eq!(extra_bits.code_number, 285); 458 | assert_eq!(extra_bits.num_bits, 0); 459 | } 460 | 461 | #[test] 462 | fn test_distance_code() { 463 | assert_eq!(get_distance_code(1), 0); 464 | // Using 0 for None at the moment 465 | assert_eq!(get_distance_code(0), 0); 466 | assert_eq!(get_distance_code(50000), 0); 467 | assert_eq!(get_distance_code(6146), 25); 468 | assert_eq!(get_distance_code(256), 15); 469 | assert_eq!(get_distance_code(4733), 24); 470 | assert_eq!(get_distance_code(257), 16); 471 | } 472 | 473 | #[test] 474 | fn test_distance_extra_bits() { 475 | let extra = get_distance_code_and_extra_bits(527); 476 | assert_eq!(extra.value, 0b1110); 477 | assert_eq!(extra.code_number, 18); 478 | assert_eq!(extra.num_bits, 8); 479 | let extra = get_distance_code_and_extra_bits(256); 480 | assert_eq!(extra.code_number, 15); 481 | assert_eq!(extra.num_bits, 6); 482 | let extra = get_distance_code_and_extra_bits(4733); 483 | assert_eq!(extra.code_number, 24); 484 | assert_eq!(extra.num_bits, 11); 485 | } 486 | 487 | #[test] 488 | fn test_length_table_fixed() { 489 | let _ = build_length_count_table(&FIXED_CODE_LENGTHS, &mut [0; 16]); 490 | } 491 | 492 | #[test] 493 | #[should_panic] 494 | fn test_length_table_max_length() { 495 | let table = [16u8; 288]; 496 | build_length_count_table(&table, &mut [0; 16]); 497 | } 498 | 499 | #[test] 500 | #[should_panic] 501 | fn test_empty_table() { 502 | let table = []; 503 | build_length_count_table(&table, &mut [0; 16]); 504 | } 505 | 506 | #[test] 507 | fn make_table_fixed() { 508 | let table = HuffmanTable::fixed_table(); 509 | assert_eq!(table.codes[0], 0b00001100); 510 | assert_eq!(table.codes[143], 0b11111101); 511 | assert_eq!(table.codes[144], 0b000010011); 512 | assert_eq!(table.codes[255], 0b111111111); 513 | assert_eq!(table.codes[256], 0b0000000); 514 | assert_eq!(table.codes[279], 0b1110100); 515 | assert_eq!(table.codes[280], 0b00000011); 516 | assert_eq!(table.codes[287], 0b11100011); 517 | 518 | assert_eq!(table.distance_codes[0], 0); 519 | assert_eq!(table.distance_codes[5], 20); 520 | 521 | let ld = table.get_length_distance_code(4, 5); 522 | 523 | assert_eq!(ld.length_code.code, 0b00100000); 524 | assert_eq!(ld.distance_code.code, 0b00100); 525 | assert_eq!(ld.distance_extra_bits.length, 1); 526 | assert_eq!(ld.distance_extra_bits.code, 0); 527 | } 528 | 529 | #[test] 530 | fn extra_bits_distance() { 531 | use std::mem::size_of; 532 | for i in 0..NUM_DISTANCE_CODES { 533 | assert_eq!( 534 | num_extra_bits_for_distance_code(i as u8), 535 | DISTANCE_EXTRA_BITS[i] 536 | ); 537 | } 538 | println!("Size of huffmanCode struct: {}", size_of::()); 539 | } 540 | } 541 | -------------------------------------------------------------------------------- /src/input_buffer.rs: -------------------------------------------------------------------------------- 1 | use std::cmp; 2 | 3 | use crate::chained_hash_table::WINDOW_SIZE; 4 | 5 | const MAX_MATCH: usize = crate::huffman_table::MAX_MATCH as usize; 6 | 7 | /// The maximum size of the buffer. 8 | pub const BUFFER_SIZE: usize = (WINDOW_SIZE * 2) + MAX_MATCH; 9 | 10 | pub struct InputBuffer { 11 | buffer: Vec, 12 | } 13 | 14 | impl InputBuffer { 15 | #[cfg(test)] 16 | pub fn new<'a>(data: &'a [u8]) -> (InputBuffer, Option<&[u8]>) { 17 | let mut b = InputBuffer::empty(); 18 | let rem = b.add_data(data); 19 | (b, rem) 20 | } 21 | 22 | pub fn empty() -> InputBuffer { 23 | InputBuffer { 24 | buffer: Vec::with_capacity(BUFFER_SIZE), 25 | } 26 | } 27 | 28 | /// Add data to the buffer. 29 | /// 30 | /// Returns a slice of the data that was not added (including the lookahead if any). 31 | pub fn add_data<'a>(&mut self, data: &'a [u8]) -> Option<&'a [u8]> { 32 | debug_assert!(self.current_end() <= BUFFER_SIZE); 33 | if self.current_end() + data.len() > BUFFER_SIZE { 34 | // Add data and return how much was left. 35 | let consumed = { 36 | let space_left = BUFFER_SIZE - self.buffer.len(); 37 | self.buffer.extend_from_slice(&data[..space_left]); 38 | space_left 39 | }; 40 | Some(&data[consumed..]) 41 | } else { 42 | // There's space for all of the data. 43 | self.buffer.extend_from_slice(data); 44 | None 45 | } 46 | } 47 | 48 | /// Get the current amount of data in the buffer. 49 | pub fn current_end(&self) -> usize { 50 | self.buffer.len() 51 | } 52 | 53 | /// Slide the input window and add new data. 54 | /// 55 | /// Returns a slice containing the data that did not fit, or `None` if all data was consumed. 56 | pub fn slide<'a>(&mut self, data: &'a [u8]) -> Option<&'a [u8]> { 57 | // This should only be used when the buffer is full 58 | assert!(self.buffer.len() > WINDOW_SIZE * 2); 59 | 60 | // Do this in a closure to to end the borrow of buffer. 61 | let (final_len, upper_len, end) = { 62 | // Split into lower window and upper window + lookahead 63 | let (lower, upper) = self.buffer.split_at_mut(WINDOW_SIZE); 64 | // Copy the upper window to the lower window 65 | lower.copy_from_slice(&upper[..WINDOW_SIZE]); 66 | let lookahead_len = { 67 | // Copy the lookahead to the start of the upper window 68 | let (upper_2, lookahead) = upper.split_at_mut(WINDOW_SIZE); 69 | let lookahead_len = lookahead.len(); 70 | debug_assert!(lookahead_len <= MAX_MATCH); 71 | upper_2[..lookahead_len].copy_from_slice(lookahead); 72 | lookahead_len 73 | }; 74 | 75 | // Length of the upper window minus the lookahead bytes 76 | let upper_len = upper.len() - lookahead_len; 77 | let end = cmp::min(data.len(), upper_len); 78 | upper[lookahead_len..lookahead_len + end].copy_from_slice(&data[..end]); 79 | // Remove unused data if any. 80 | (lower.len() + lookahead_len + end, upper_len, end) 81 | }; 82 | // Remove unused space. 83 | self.buffer.truncate(final_len); 84 | 85 | if data.len() > upper_len { 86 | // Return a slice of the data that was not added 87 | Some(&data[end..]) 88 | } else { 89 | None 90 | } 91 | } 92 | 93 | /// Get a mutable slice of the used part of the buffer. 94 | pub fn get_buffer(&mut self) -> &mut [u8] { 95 | &mut self.buffer 96 | } 97 | } 98 | 99 | #[cfg(test)] 100 | mod test { 101 | use super::MAX_MATCH; 102 | use super::*; 103 | use crate::chained_hash_table::WINDOW_SIZE; 104 | #[test] 105 | pub fn buffer_add_full() { 106 | let data = [10u8; BUFFER_SIZE + 10]; 107 | let (mut buf, extra) = InputBuffer::new(&data[..]); 108 | assert!(extra.unwrap() == &[10; 10]); 109 | let to_add = [2, 5, 3]; 110 | let not_added = buf.add_data(&to_add); 111 | assert_eq!(not_added.unwrap(), to_add); 112 | } 113 | 114 | #[test] 115 | pub fn buffer_add_not_full() { 116 | let data = [10u8; BUFFER_SIZE - 5]; 117 | let (mut buf, extra) = InputBuffer::new(&data[..]); 118 | assert_eq!(buf.current_end(), data.len()); 119 | assert_eq!(extra, None); 120 | let to_add = [2, 5, 3]; 121 | { 122 | let not_added = buf.add_data(&to_add); 123 | assert!(not_added.is_none()); 124 | } 125 | let not_added = buf.add_data(&to_add); 126 | assert_eq!(not_added.unwrap()[0], 3); 127 | } 128 | 129 | #[test] 130 | fn slide() { 131 | let data = [10u8; BUFFER_SIZE]; 132 | let (mut buf, extra) = InputBuffer::new(&data[..]); 133 | assert_eq!(extra, None); 134 | let to_add = [5; 5]; 135 | let rem = buf.slide(&to_add); 136 | assert!(rem.is_none()); 137 | { 138 | let slice = buf.get_buffer(); 139 | assert!(slice[..WINDOW_SIZE + MAX_MATCH] == data[WINDOW_SIZE..]); 140 | assert_eq!( 141 | slice[WINDOW_SIZE + MAX_MATCH..WINDOW_SIZE + MAX_MATCH + 5], 142 | to_add 143 | ); 144 | } 145 | assert_eq!(buf.current_end(), WINDOW_SIZE + MAX_MATCH + to_add.len()); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html) 2 | //! compression algorithm in pure Rust. 3 | //! 4 | //! This library provides functions to compress data using the DEFLATE algorithm, 5 | //! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or 6 | //! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats. 7 | //! The current implementation is still a bit lacking speed-wise compared to C-libraries 8 | //! like zlib and miniz. 9 | //! 10 | //! The deflate algorithm is an older compression algorithm that is still widely used today, 11 | //! by e.g html headers, the `.png` image format, the Unix `gzip` program and commonly in `.zip` 12 | //! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing 13 | //! some extra metadata and a checksum to validate the integrity of the raw data. 14 | //! 15 | //! The deflate algorithm does not perform as well as newer algorithms used in file formats such as 16 | //! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where 17 | //! the `DEFLATE` format (with or without wrappers) is not required. 18 | //! 19 | //! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default 20 | //! but can be enabled with the `gzip` feature. 21 | //! 22 | //! As this library is still in development, the compression output may change slightly 23 | //! between versions. 24 | //! 25 | //! 26 | //! # Examples: 27 | //! ## Simple compression function: 28 | //! ``` rust 29 | //! use deflate::deflate_bytes; 30 | //! 31 | //! let data = b"Some data"; 32 | //! let compressed = deflate_bytes(data); 33 | //! # let _ = compressed; 34 | //! ``` 35 | //! 36 | //! ## Using a writer: 37 | //! ``` rust 38 | //! use std::io::Write; 39 | //! 40 | //! use deflate::Compression; 41 | //! use deflate::write::ZlibEncoder; 42 | //! 43 | //! let data = b"This is some test data"; 44 | //! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default); 45 | //! encoder.write_all(data).expect("Write error!"); 46 | //! let compressed_data = encoder.finish().expect("Failed to finish compression!"); 47 | //! # let _ = compressed_data; 48 | //! ``` 49 | 50 | #![forbid(unsafe_code)] 51 | #![cfg_attr(all(feature = "benchmarks", test), feature(test))] 52 | 53 | #[cfg(all(test, feature = "benchmarks"))] 54 | extern crate test as test_std; 55 | 56 | #[cfg(test)] 57 | extern crate miniz_oxide; 58 | 59 | extern crate adler32; 60 | #[cfg(feature = "gzip")] 61 | extern crate gzip_header; 62 | 63 | mod bit_reverse; 64 | mod bitstream; 65 | mod chained_hash_table; 66 | mod checksum; 67 | mod compress; 68 | mod compression_options; 69 | mod deflate_state; 70 | mod encoder_state; 71 | mod huffman_lengths; 72 | mod huffman_table; 73 | mod input_buffer; 74 | mod length_encode; 75 | mod lz77; 76 | mod lzvalue; 77 | mod matching; 78 | mod output_writer; 79 | mod rle; 80 | mod stored_block; 81 | #[cfg(test)] 82 | mod test_utils; 83 | mod writer; 84 | mod zlib; 85 | 86 | use std::io; 87 | use std::io::Write; 88 | 89 | #[cfg(feature = "gzip")] 90 | use gzip_header::Crc; 91 | #[cfg(feature = "gzip")] 92 | use gzip_header::GzBuilder; 93 | 94 | use crate::checksum::RollingChecksum; 95 | use crate::deflate_state::DeflateState; 96 | 97 | use crate::compress::Flush; 98 | pub use compression_options::{Compression, CompressionOptions, SpecialOptions}; 99 | pub use lz77::MatchingType; 100 | 101 | use crate::writer::compress_until_done; 102 | 103 | /// Encoders implementing a `Write` interface. 104 | pub mod write { 105 | #[cfg(feature = "gzip")] 106 | pub use crate::writer::gzip::GzEncoder; 107 | pub use crate::writer::{DeflateEncoder, ZlibEncoder}; 108 | } 109 | 110 | fn compress_data_dynamic( 111 | input: &[u8], 112 | writer: &mut W, 113 | mut checksum: RC, 114 | compression_options: CompressionOptions, 115 | ) -> io::Result<()> { 116 | checksum.update_from_slice(input); 117 | // We use a box here to avoid putting the buffers on the stack 118 | // It's done here rather than in the structs themselves for now to 119 | // keep the data close in memory. 120 | let mut deflate_state = Box::new(DeflateState::new(compression_options, writer)); 121 | compress_until_done(input, &mut deflate_state, Flush::Finish) 122 | } 123 | 124 | /// Compress the given slice of bytes with DEFLATE compression. 125 | /// 126 | /// Returns a `Vec` of the compressed data. 127 | /// 128 | /// # Examples 129 | /// 130 | /// ``` 131 | /// use deflate::{deflate_bytes_conf, Compression}; 132 | /// 133 | /// let data = b"This is some test data"; 134 | /// let compressed_data = deflate_bytes_conf(data, Compression::Best); 135 | /// # let _ = compressed_data; 136 | /// ``` 137 | pub fn deflate_bytes_conf>(input: &[u8], options: O) -> Vec { 138 | let mut writer = Vec::with_capacity(input.len() / 3); 139 | compress_data_dynamic( 140 | input, 141 | &mut writer, 142 | checksum::NoChecksum::new(), 143 | options.into(), 144 | ) 145 | .expect("Write error!"); 146 | writer 147 | } 148 | 149 | /// Compress the given slice of bytes with DEFLATE compression using the default compression 150 | /// level. 151 | /// 152 | /// Returns a `Vec` of the compressed data. 153 | /// 154 | /// # Examples 155 | /// 156 | /// ``` 157 | /// use deflate::deflate_bytes; 158 | /// 159 | /// let data = b"This is some test data"; 160 | /// let compressed_data = deflate_bytes(data); 161 | /// # let _ = compressed_data; 162 | /// ``` 163 | pub fn deflate_bytes(input: &[u8]) -> Vec { 164 | deflate_bytes_conf(input, Compression::Default) 165 | } 166 | 167 | /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer. 168 | /// 169 | /// Returns a `Vec` of the compressed data. 170 | /// 171 | /// Zlib dictionaries are not yet suppored. 172 | /// 173 | /// # Examples 174 | /// 175 | /// ``` 176 | /// use deflate::{deflate_bytes_zlib_conf, Compression}; 177 | /// 178 | /// let data = b"This is some test data"; 179 | /// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best); 180 | /// # let _ = compressed_data; 181 | /// ``` 182 | pub fn deflate_bytes_zlib_conf>(input: &[u8], options: O) -> Vec { 183 | let mut writer = Vec::with_capacity(input.len() / 3); 184 | // Write header 185 | zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default) 186 | .expect("Write error when writing zlib header!"); 187 | 188 | let mut checksum = checksum::Adler32Checksum::new(); 189 | compress_data_dynamic(input, &mut writer, &mut checksum, options.into()) 190 | .expect("Write error when writing compressed data!"); 191 | 192 | let hash = checksum.current_hash(); 193 | 194 | writer 195 | .write_all(&hash.to_be_bytes()) 196 | .expect("Write error when writing checksum!"); 197 | writer 198 | } 199 | 200 | /// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer, 201 | /// using the default compression level. 202 | /// 203 | /// Returns a Vec of the compressed data. 204 | /// 205 | /// Zlib dictionaries are not yet suppored. 206 | /// 207 | /// # Examples 208 | /// 209 | /// ``` 210 | /// use deflate::deflate_bytes_zlib; 211 | /// 212 | /// let data = b"This is some test data"; 213 | /// let compressed_data = deflate_bytes_zlib(data); 214 | /// # let _ = compressed_data; 215 | /// ``` 216 | pub fn deflate_bytes_zlib(input: &[u8]) -> Vec { 217 | deflate_bytes_zlib_conf(input, Compression::Default) 218 | } 219 | 220 | /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer 221 | /// using the given gzip header and compression options. 222 | /// 223 | /// Returns a `Vec` of the compressed data. 224 | /// 225 | /// 226 | /// # Examples 227 | /// 228 | /// ``` 229 | /// extern crate gzip_header; 230 | /// extern crate deflate; 231 | /// 232 | /// # fn main() { 233 | /// use deflate::{deflate_bytes_gzip_conf, Compression}; 234 | /// use gzip_header::GzBuilder; 235 | /// 236 | /// let data = b"This is some test data"; 237 | /// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new()); 238 | /// # let _ = compressed_data; 239 | /// # } 240 | /// ``` 241 | #[cfg(feature = "gzip")] 242 | pub fn deflate_bytes_gzip_conf>( 243 | input: &[u8], 244 | options: O, 245 | gzip_header: GzBuilder, 246 | ) -> Vec { 247 | let mut writer = Vec::with_capacity(input.len() / 3); 248 | 249 | // Write header 250 | writer 251 | .write_all(&gzip_header.into_header()) 252 | .expect("Write error when writing header!"); 253 | let mut checksum = checksum::NoChecksum::new(); 254 | compress_data_dynamic(input, &mut writer, &mut checksum, options.into()) 255 | .expect("Write error when writing compressed data!"); 256 | 257 | let mut crc = Crc::new(); 258 | crc.update(input); 259 | 260 | writer 261 | .write_all(&crc.sum().to_le_bytes()) 262 | .expect("Write error when writing checksum!"); 263 | writer 264 | .write_all(&crc.amt_as_u32().to_le_bytes()) 265 | .expect("Write error when writing amt!"); 266 | writer 267 | } 268 | 269 | /// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer, 270 | /// using the default compression level, and a gzip header with default values. 271 | /// 272 | /// Returns a `Vec` of the compressed data. 273 | /// 274 | /// 275 | /// # Examples 276 | /// 277 | /// ``` 278 | /// use deflate::deflate_bytes_gzip; 279 | /// let data = b"This is some test data"; 280 | /// let compressed_data = deflate_bytes_gzip(data); 281 | /// # let _ = compressed_data; 282 | /// ``` 283 | #[cfg(feature = "gzip")] 284 | pub fn deflate_bytes_gzip(input: &[u8]) -> Vec { 285 | deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new()) 286 | } 287 | 288 | #[cfg(test)] 289 | mod test { 290 | use super::*; 291 | use std::io::Write; 292 | 293 | #[cfg(feature = "gzip")] 294 | use test_utils::decompress_gzip; 295 | use test_utils::{decompress_to_end, decompress_zlib, get_test_data}; 296 | 297 | type CO = CompressionOptions; 298 | 299 | /// Write data to the writer in chunks of chunk_size. 300 | fn chunked_write(mut writer: W, data: &[u8], chunk_size: usize) { 301 | for chunk in data.chunks(chunk_size) { 302 | writer.write_all(&chunk).unwrap(); 303 | } 304 | } 305 | 306 | #[test] 307 | fn dynamic_string_mem() { 308 | let test_data = String::from(" GNU GENERAL PUBLIC LICENSE").into_bytes(); 309 | let compressed = deflate_bytes(&test_data); 310 | 311 | assert!(compressed.len() < test_data.len()); 312 | 313 | let result = decompress_to_end(&compressed); 314 | assert_eq!(test_data, result); 315 | } 316 | 317 | #[test] 318 | fn dynamic_string_file() { 319 | let input = get_test_data(); 320 | let compressed = deflate_bytes(&input); 321 | 322 | let result = decompress_to_end(&compressed); 323 | for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() { 324 | if a != b { 325 | println!("First difference at {}, input: {}, output: {}", n, a, b); 326 | println!( 327 | "input: {:?}, output: {:?}", 328 | &input[n - 3..n + 3], 329 | &result[n - 3..n + 3] 330 | ); 331 | break; 332 | } 333 | } 334 | // Not using assert_eq here deliberately to avoid massive amounts of output spam 335 | assert!(input == result); 336 | // Check that we actually managed to compress the input 337 | assert!(compressed.len() < input.len()); 338 | } 339 | 340 | #[test] 341 | fn file_rle() { 342 | let input = get_test_data(); 343 | let compressed = deflate_bytes_conf(&input, CO::rle()); 344 | 345 | let result = decompress_to_end(&compressed); 346 | assert!(input == result); 347 | } 348 | 349 | #[test] 350 | fn file_zlib() { 351 | let test_data = get_test_data(); 352 | 353 | let compressed = deflate_bytes_zlib(&test_data); 354 | // { 355 | // use std::fs::File; 356 | // use std::io::Write; 357 | // let mut f = File::create("out.zlib").unwrap(); 358 | // f.write_all(&compressed).unwrap(); 359 | // } 360 | 361 | println!("file_zlib compressed(default) length: {}", compressed.len()); 362 | 363 | let result = decompress_zlib(&compressed); 364 | 365 | assert!(&test_data == &result); 366 | assert!(compressed.len() < test_data.len()); 367 | } 368 | 369 | #[test] 370 | fn zlib_short() { 371 | let test_data = [10, 10, 10, 10, 10, 55]; 372 | roundtrip_zlib(&test_data, CO::default()); 373 | } 374 | 375 | #[test] 376 | fn zlib_last_block() { 377 | let mut test_data = vec![22; 32768]; 378 | test_data.extend(&[5, 2, 55, 11, 12]); 379 | roundtrip_zlib(&test_data, CO::default()); 380 | } 381 | 382 | #[test] 383 | fn deflate_short() { 384 | let test_data = [10, 10, 10, 10, 10, 55]; 385 | let compressed = deflate_bytes(&test_data); 386 | 387 | let result = decompress_to_end(&compressed); 388 | assert_eq!(&test_data, result.as_slice()); 389 | // If block type and compression is selected correctly, this should only take 5 bytes. 390 | assert_eq!(compressed.len(), 5); 391 | } 392 | 393 | #[cfg(feature = "gzip")] 394 | #[test] 395 | fn gzip() { 396 | let data = get_test_data(); 397 | let comment = b"Test"; 398 | let compressed = deflate_bytes_gzip_conf( 399 | &data, 400 | Compression::Default, 401 | GzBuilder::new().comment(&comment[..]), 402 | ); 403 | let (dec, decompressed) = decompress_gzip(&compressed); 404 | assert_eq!(dec.comment().unwrap(), comment); 405 | assert!(data == decompressed); 406 | } 407 | 408 | fn chunk_test(chunk_size: usize, level: CompressionOptions) { 409 | let mut compressed = Vec::with_capacity(32000); 410 | let data = get_test_data(); 411 | { 412 | let mut compressor = write::ZlibEncoder::new(&mut compressed, level); 413 | chunked_write(&mut compressor, &data, chunk_size); 414 | compressor.finish().unwrap(); 415 | } 416 | let compressed2 = deflate_bytes_zlib_conf(&data, level); 417 | let res = decompress_zlib(&compressed); 418 | assert!(res == data); 419 | assert_eq!(compressed.len(), compressed2.len()); 420 | assert!(compressed == compressed2); 421 | } 422 | 423 | fn writer_chunks_level(level: CompressionOptions) { 424 | use input_buffer::BUFFER_SIZE; 425 | let ct = |n| chunk_test(n, level); 426 | ct(1); 427 | ct(50); 428 | ct(400); 429 | ct(32768); 430 | ct(BUFFER_SIZE); 431 | ct(50000); 432 | ct((32768 * 2) + 258); 433 | } 434 | 435 | #[ignore] 436 | #[test] 437 | /// Test the writer by inputing data in one chunk at the time. 438 | fn zlib_writer_chunks() { 439 | writer_chunks_level(CompressionOptions::default()); 440 | writer_chunks_level(CompressionOptions::fast()); 441 | writer_chunks_level(CompressionOptions::rle()); 442 | } 443 | 444 | /// Check that the frequency values don't overflow. 445 | #[test] 446 | fn frequency_overflow() { 447 | let _ = deflate_bytes_conf( 448 | &vec![5; 100000], 449 | compression_options::CompressionOptions::default(), 450 | ); 451 | } 452 | 453 | fn roundtrip_zlib(data: &[u8], level: CompressionOptions) { 454 | let compressed = deflate_bytes_zlib_conf(data, level); 455 | let res = decompress_zlib(&compressed); 456 | if data.len() <= 32 { 457 | assert_eq!(res, data, "Failed with level: {:?}", level); 458 | } else { 459 | assert!(res == data, "Failed with level: {:?}", level); 460 | } 461 | } 462 | 463 | fn check_zero(level: CompressionOptions) { 464 | roundtrip_zlib(&[], level); 465 | } 466 | 467 | /// Compress with an empty slice. 468 | #[test] 469 | fn empty_input() { 470 | check_zero(CompressionOptions::default()); 471 | check_zero(CompressionOptions::fast()); 472 | check_zero(CompressionOptions::rle()); 473 | } 474 | 475 | #[test] 476 | fn one_and_two_values() { 477 | let one = &[1][..]; 478 | roundtrip_zlib(one, CO::rle()); 479 | roundtrip_zlib(one, CO::fast()); 480 | roundtrip_zlib(one, CO::default()); 481 | let two = &[5, 6, 7, 8][..]; 482 | roundtrip_zlib(two, CO::rle()); 483 | roundtrip_zlib(two, CO::fast()); 484 | roundtrip_zlib(two, CO::default()); 485 | } 486 | } 487 | -------------------------------------------------------------------------------- /src/lzvalue.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | use crate::huffman_table::MAX_MATCH; 3 | use crate::huffman_table::{MAX_DISTANCE, MIN_MATCH}; 4 | 5 | #[derive(Copy, Clone, Eq, PartialEq, Debug)] 6 | pub struct StoredLength { 7 | length: u8, 8 | } 9 | 10 | impl StoredLength { 11 | #[cfg(test)] 12 | pub fn from_actual_length(length: u16) -> StoredLength { 13 | assert!(length <= MAX_MATCH && length >= MIN_MATCH); 14 | StoredLength { 15 | length: (length - MIN_MATCH) as u8, 16 | } 17 | } 18 | 19 | pub const fn new(stored_length: u8) -> StoredLength { 20 | StoredLength { 21 | length: stored_length, 22 | } 23 | } 24 | 25 | pub const fn stored_length(&self) -> u8 { 26 | self.length 27 | } 28 | 29 | #[cfg(test)] 30 | pub fn actual_length(&self) -> u16 { 31 | u16::from(self.length) + MIN_MATCH 32 | } 33 | } 34 | 35 | #[derive(Copy, Clone, Eq, PartialEq, Debug)] 36 | pub enum LZType { 37 | Literal(u8), 38 | StoredLengthDistance(StoredLength, u16), 39 | } 40 | 41 | #[derive(Copy, Clone, Eq, PartialEq, Debug)] 42 | pub struct LZValue { 43 | litlen: u8, 44 | distance: u16, 45 | } 46 | 47 | impl LZValue { 48 | #[inline] 49 | pub const fn literal(value: u8) -> LZValue { 50 | LZValue { 51 | litlen: value, 52 | distance: 0, 53 | } 54 | } 55 | 56 | /// Create length-distance pair. 57 | #[inline] 58 | pub fn length_distance(length: u16, distance: u16) -> LZValue { 59 | // TODO: Enforce min/max without too much perf penalty. 60 | debug_assert!(distance > 0 && distance <= MAX_DISTANCE); 61 | let stored_length = (length - MIN_MATCH) as u8; 62 | LZValue { 63 | litlen: stored_length, 64 | distance, 65 | } 66 | } 67 | 68 | #[inline] 69 | pub fn value(&self) -> LZType { 70 | if self.distance != 0 { 71 | LZType::StoredLengthDistance(StoredLength::new(self.litlen), self.distance) 72 | } else { 73 | LZType::Literal(self.litlen) 74 | } 75 | } 76 | } 77 | 78 | #[cfg(test)] 79 | pub fn lit(l: u8) -> LZValue { 80 | LZValue::literal(l) 81 | } 82 | 83 | #[cfg(test)] 84 | pub fn ld(l: u16, d: u16) -> LZValue { 85 | LZValue::length_distance(l, d) 86 | } 87 | 88 | #[cfg(test)] 89 | mod test { 90 | use super::*; 91 | use crate::huffman_table::{MAX_DISTANCE, MAX_MATCH, MIN_DISTANCE, MIN_MATCH}; 92 | #[test] 93 | fn lzvalue() { 94 | for i in 0..255 as usize + 1 { 95 | let v = LZValue::literal(i as u8); 96 | if let LZType::Literal(n) = v.value() { 97 | assert_eq!(n as usize, i); 98 | } else { 99 | panic!(); 100 | } 101 | } 102 | 103 | for i in MIN_MATCH..MAX_MATCH + 1 { 104 | let v = LZValue::length_distance(i, 5); 105 | if let LZType::StoredLengthDistance(l, _) = v.value() { 106 | assert_eq!(l.actual_length(), i); 107 | } else { 108 | panic!(); 109 | } 110 | } 111 | 112 | for i in MIN_DISTANCE..MAX_DISTANCE + 1 { 113 | let v = LZValue::length_distance(5, i); 114 | 115 | if let LZType::StoredLengthDistance(_, d) = v.value() { 116 | assert_eq!(d, i); 117 | } else { 118 | panic!("Failed to get distance {}", i); 119 | } 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/matching.rs: -------------------------------------------------------------------------------- 1 | use std::cmp; 2 | 3 | use crate::chained_hash_table::{ChainedHashTable, WINDOW_SIZE}; 4 | 5 | const MAX_MATCH: usize = crate::huffman_table::MAX_MATCH as usize; 6 | #[cfg(test)] 7 | const MIN_MATCH: usize = crate::huffman_table::MIN_MATCH as usize; 8 | 9 | /// Get the length of the checked match 10 | /// The function returns number of bytes at and including `current_pos` that are the same as the 11 | /// ones at `pos_to_check` 12 | #[inline] 13 | pub fn get_match_length(data: &[u8], current_pos: usize, pos_to_check: usize) -> usize { 14 | // Unsafe version using unaligned loads for comparison. 15 | // Faster when benching the matching function alone, 16 | // but not as significant when running the full thing. 17 | /* 18 | type Comp = u64; 19 | 20 | use std::mem::size_of; 21 | 22 | let max = cmp::min(data.len() - current_pos, MAX_MATCH); 23 | let mut left = max; 24 | let s = size_of::(); 25 | 26 | unsafe { 27 | let mut cur = data.as_ptr().offset(current_pos as isize); 28 | let mut tc = data.as_ptr().offset(pos_to_check as isize); 29 | while left >= s && 30 | (*(cur as *const Comp) == *(tc as *const Comp)) { 31 | left -= s; 32 | cur = cur.offset(s as isize); 33 | tc = tc.offset(s as isize); 34 | } 35 | while left > 0 && *cur == *tc { 36 | left -= 1; 37 | cur = cur.offset(1); 38 | tc = tc.offset(1); 39 | } 40 | } 41 | 42 | max - left 43 | */ 44 | 45 | // Slightly faster than naive in single bench. 46 | // Does not use unaligned loads. 47 | // let l = cmp::min(MAX_MATCH, data.len() - current_pos); 48 | 49 | // let a = unsafe{&data.get_unchecked(current_pos..current_pos + l)}; 50 | // let b = unsafe{&data.get_unchecked(pos_to_check..)}; 51 | 52 | // let mut len = 0; 53 | 54 | // for (l, r) in a 55 | // .iter() 56 | // .zip(b.iter()) { 57 | // if *l == *r { 58 | // len += 1; 59 | // continue; 60 | // } else { 61 | // break; 62 | // } 63 | // } 64 | // len as usize 65 | 66 | // Naive version 67 | data[current_pos..] 68 | .iter() 69 | .zip(data[pos_to_check..].iter()) 70 | .take(MAX_MATCH) 71 | .take_while(|&(&a, &b)| a == b) 72 | .count() 73 | } 74 | 75 | /// Try finding the position and length of the longest match in the input data. 76 | /// # Returns 77 | /// (length, distance from position) 78 | /// If no match is found that was better than `prev_length` or at all, or we are at the start, 79 | /// the length value returned will be 2. 80 | /// 81 | /// # Arguments: 82 | /// `data`: The data to search in. 83 | /// `hash_table`: Hash table to use for searching. 84 | /// `position`: The position in the data to match against. 85 | /// `prev_length`: The length of the previous `longest_match` check to compare against. 86 | /// `max_hash_checks`: The maximum number of matching hash chain positions to check. 87 | pub fn longest_match( 88 | data: &[u8], 89 | hash_table: &ChainedHashTable, 90 | position: usize, 91 | prev_length: usize, 92 | max_hash_checks: u16, 93 | ) -> (usize, usize) { 94 | // debug_assert_eq!(position, hash_table.current_head() as usize); 95 | 96 | // If we already have a match at the maximum length, 97 | // or we can't grow further, we stop here. 98 | if prev_length >= MAX_MATCH || position + prev_length >= data.len() { 99 | return (0, 0); 100 | } 101 | 102 | let limit = if position > WINDOW_SIZE { 103 | position - WINDOW_SIZE 104 | } else { 105 | 0 106 | }; 107 | 108 | // Make sure the length is at least one to simplify the matching code, as 109 | // otherwise the matching code might underflow. 110 | let prev_length = cmp::max(prev_length, 1); 111 | 112 | let max_length = cmp::min(data.len() - position, MAX_MATCH); 113 | 114 | // The position in the hash chain we are currently checking. 115 | let mut current_head = position; 116 | 117 | // The best match length we've found so far, and it's distance. 118 | let mut best_length = prev_length; 119 | let mut best_distance = 0; 120 | 121 | // The position of the previous value in the hash chain. 122 | let mut prev_head; 123 | 124 | for _ in 0..max_hash_checks { 125 | prev_head = current_head; 126 | current_head = hash_table.get_prev(current_head) as usize; 127 | if current_head >= prev_head || current_head < limit { 128 | // If the current hash chain value refers to itself, or is referring to 129 | // a value that's higher (we only move backwars through the chain), 130 | // we are at the end and can stop. 131 | break; 132 | } 133 | 134 | // We only check further if the match length can actually increase 135 | // Checking if the end byte and the potential next byte matches is generally 136 | // more likely to give a quick answer rather than checking from the start first, given 137 | // that the hashes match. 138 | // If there is no previous match, best_length will be 1 and the two first bytes will 139 | // be checked instead. 140 | // Since we've made sure best_length is always at least 1, this shouldn't underflow. 141 | if data[position + best_length - 1..=position + best_length] 142 | == data[current_head + best_length - 1..=current_head + best_length] 143 | { 144 | // Actually check how many bytes match. 145 | // At the moment this will check the two bytes we just checked again, 146 | // though adding code for skipping these bytes may not result in any speed 147 | // gain due to the added complexity. 148 | let length = get_match_length(data, position, current_head); 149 | if length > best_length { 150 | best_length = length; 151 | best_distance = position - current_head; 152 | if length == max_length { 153 | // We are at the max length, so there is no point 154 | // searching any longer 155 | break; 156 | } 157 | } 158 | } 159 | } 160 | 161 | if best_length > prev_length { 162 | (best_length, best_distance) 163 | } else { 164 | (0, 0) 165 | } 166 | } 167 | 168 | /// Try finding the position and length of the longest match in the input data using fast zlib 169 | /// hash skipping algorithm. 170 | /// # Returns 171 | /// (length, distance from position) 172 | /// If no match is found that was better than `prev_length` or at all, or we are at the start, 173 | /// the length value returned will be 2. 174 | /// 175 | /// # Arguments: 176 | /// `data`: The data to search in. 177 | /// `hash_table`: Hash table to use for searching. 178 | /// `position`: The position in the data to match against. 179 | /// `prev_length`: The length of the previous `longest_match` check to compare against. 180 | /// `max_hash_checks`: The maximum number of matching hash chain positions to check. 181 | #[cfg(test)] 182 | pub fn longest_match_fast( 183 | data: &[u8], 184 | hash_table: &ChainedHashTable, 185 | position: usize, 186 | prev_length: usize, 187 | max_hash_checks: u16, 188 | ) -> (usize, usize) { 189 | // debug_assert_eq!(position, hash_table.current_head() as usize); 190 | 191 | // If we already have a match at the maximum length, 192 | // or we can't grow further, we stop here. 193 | if prev_length >= MAX_MATCH || position + prev_length >= data.len() { 194 | return (0, 0); 195 | } 196 | 197 | let limit = if position > WINDOW_SIZE { 198 | position - WINDOW_SIZE 199 | } else { 200 | 0 201 | }; 202 | 203 | // Make sure the length is at least one to simplify the matching code, as 204 | // otherwise the matching code might underflow. 205 | let prev_length = cmp::max(prev_length, 1); 206 | 207 | let max_length = cmp::min(data.len() - position, MAX_MATCH); 208 | 209 | // The position in the hash chain we are currently checking. 210 | let mut current_head = position; 211 | 212 | // The best match length we've found so far, and it's distance. 213 | let mut best_length = prev_length; 214 | let mut best_distance = 0; 215 | // The offset from the start of the match of the hash chain we are traversing. 216 | let mut offset = 0; 217 | 218 | // The position of the previous value in the hash chain. 219 | let mut prev_head; 220 | 221 | for _ in 0..max_hash_checks { 222 | prev_head = current_head; 223 | current_head = hash_table.get_prev(current_head) as usize; 224 | if current_head >= prev_head || current_head < limit + offset { 225 | // If the current hash chain value refers to itself, or is referring to 226 | // a value that's higher (we only move backwars through the chain), 227 | // we are at the end and can stop. 228 | break; 229 | } 230 | 231 | let offset_head = current_head - offset; 232 | 233 | // We only check further if the match length can actually increase 234 | // Checking if the end byte and the potential next byte matches is generally 235 | // more likely to give a quick answer rather than checking from the start first, given 236 | // that the hashes match. 237 | // If there is no previous match, best_length will be 1 and the two first bytes will 238 | // be checked instead. 239 | // Since we've made sure best_length is always at least 1, this shouldn't underflow. 240 | if data[position + best_length - 1..position + best_length + 1] 241 | == data[offset_head + best_length - 1..offset_head + best_length + 1] 242 | { 243 | // Actually check how many bytes match. 244 | // At the moment this will check the two bytes we just checked again, 245 | // though adding code for skipping these bytes may not result in any speed 246 | // gain due to the added complexity. 247 | let length = get_match_length(data, position, offset_head); 248 | if length > best_length { 249 | best_length = length; 250 | best_distance = position - offset_head; 251 | if length == max_length { 252 | // We are at the max length, so there is no point 253 | // searching any longer 254 | break; 255 | } 256 | 257 | // Find the position in the match where the next has position is the furthest away. 258 | // By moving to a different hash chain we can potentially skip a lot of checks, 259 | // saving time. 260 | // We avoid doing this for matches that extend past the starting position, as 261 | // those will contain positions that are not in the hash table yet. 262 | if best_distance > best_length { 263 | offset = hash_table.farthest_next(offset_head, length); 264 | current_head = offset_head + offset; 265 | } 266 | } 267 | } 268 | } 269 | 270 | if best_length > prev_length { 271 | (best_length, best_distance) 272 | } else { 273 | (0, 0) 274 | } 275 | } 276 | 277 | // Get the longest match from the current position of the hash table. 278 | #[inline] 279 | #[cfg(test)] 280 | pub fn longest_match_current(data: &[u8], hash_table: &ChainedHashTable) -> (usize, usize) { 281 | use crate::compression_options::MAX_HASH_CHECKS; 282 | longest_match( 283 | data, 284 | hash_table, 285 | hash_table.current_head() as usize, 286 | MIN_MATCH as usize - 1, 287 | MAX_HASH_CHECKS, 288 | ) 289 | } 290 | 291 | #[cfg(test)] 292 | mod test { 293 | use super::{get_match_length, longest_match, longest_match_fast}; 294 | use crate::chained_hash_table::{filled_hash_table, ChainedHashTable, HASH_BYTES}; 295 | 296 | /// Test that match lengths are calculated correctly 297 | #[test] 298 | fn match_length() { 299 | let test_arr = [5u8, 5, 5, 5, 5, 9, 9, 2, 3, 5, 5, 5, 5, 5]; 300 | let l = get_match_length(&test_arr, 9, 0); 301 | assert_eq!(l, 5); 302 | let l2 = get_match_length(&test_arr, 9, 7); 303 | assert_eq!(l2, 0); 304 | let l3 = get_match_length(&test_arr, 10, 0); 305 | assert_eq!(l3, 4); 306 | } 307 | 308 | /// Test that we get the longest of the matches 309 | #[test] 310 | fn get_longest_match() { 311 | let test_data = b"xTest data, Test_data,zTest data"; 312 | let hash_table = filled_hash_table(&test_data[..23 + 1 + HASH_BYTES - 1]); 313 | 314 | let (length, distance) = super::longest_match_current(test_data, &hash_table); 315 | 316 | // We check that we get the longest match, rather than the shorter, but closer one. 317 | assert_eq!(distance, 22); 318 | assert_eq!(length, 9); 319 | let test_arr2 = [ 320 | 10u8, 10, 10, 10, 10, 10, 10, 10, 2, 3, 5, 10, 10, 10, 10, 10, 321 | ]; 322 | let hash_table = filled_hash_table(&test_arr2[..HASH_BYTES + 1 + 1 + 2]); 323 | let (length, distance) = super::longest_match_current(&test_arr2, &hash_table); 324 | 325 | assert_eq!(distance, 1); 326 | assert_eq!(length, 4); 327 | } 328 | 329 | /// Make sure we can get a match at index zero 330 | #[test] 331 | fn match_index_zero() { 332 | let test_data = b"AAAAAAA"; 333 | 334 | let mut hash_table = ChainedHashTable::from_starting_values(test_data[0], test_data[1]); 335 | for (n, &b) in test_data[2..5].iter().enumerate() { 336 | hash_table.add_hash_value(n, b); 337 | } 338 | 339 | let (match_length, match_dist) = longest_match(test_data, &hash_table, 1, 0, 4096); 340 | 341 | assert_eq!(match_dist, 1); 342 | assert!(match_length == 6); 343 | } 344 | 345 | /// Test for fast_zlib algorithm. 346 | /// Check that it doesn't give worse matches than the default one. 347 | /// ignored by default as it's slow, and best ran in release mode. 348 | #[ignore] 349 | #[test] 350 | fn fast_match_at_least_equal() { 351 | use crate::test_utils::get_test_data; 352 | for start_pos in 10000..50000 { 353 | const NUM_CHECKS: u16 = 400; 354 | let data = get_test_data(); 355 | let hash_table = filled_hash_table(&data[..start_pos + 1]); 356 | let pos = hash_table.current_head() as usize; 357 | 358 | let naive_match = longest_match(&data[..], &hash_table, pos, 0, NUM_CHECKS); 359 | let fast_match = longest_match_fast(&data[..], &hash_table, pos, 0, NUM_CHECKS); 360 | 361 | if fast_match.0 > naive_match.0 { 362 | println!("Fast match found better match!"); 363 | } 364 | 365 | assert!( 366 | fast_match.0 >= naive_match.0, 367 | "naive match had better length! start_pos: {}, naive: {:?}, fast {:?}", 368 | start_pos, 369 | naive_match, 370 | fast_match 371 | ); 372 | assert!( 373 | fast_match.1 >= naive_match.1, 374 | "naive match had better dist! start_pos: {} naive {:?}, fast {:?}", 375 | start_pos, 376 | naive_match, 377 | fast_match 378 | ); 379 | } 380 | } 381 | } 382 | 383 | #[cfg(all(test, feature = "benchmarks"))] 384 | mod bench { 385 | use super::{longest_match, longest_match_fast}; 386 | use chained_hash_table::filled_hash_table; 387 | use test_std::Bencher; 388 | use test_utils::get_test_data; 389 | #[bench] 390 | fn matching(b: &mut Bencher) { 391 | const POS: usize = 29000; 392 | let data = get_test_data(); 393 | let hash_table = filled_hash_table(&data[..POS + 1]); 394 | let pos = hash_table.current_head() as usize; 395 | println!( 396 | "M: {:?}", 397 | longest_match(&data[..], &hash_table, pos, 0, 4096) 398 | ); 399 | b.iter(|| longest_match(&data[..], &hash_table, pos, 0, 4096)); 400 | } 401 | 402 | #[bench] 403 | fn fast_matching(b: &mut Bencher) { 404 | const POS: usize = 29000; 405 | let data = get_test_data(); 406 | let hash_table = filled_hash_table(&data[..POS + 1]); 407 | let pos = hash_table.current_head() as usize; 408 | println!( 409 | "M: {:?}", 410 | longest_match_fast(&data[..], &hash_table, pos, 0, 4096) 411 | ); 412 | b.iter(|| longest_match_fast(&data[..], &hash_table, pos, 0, 4096)); 413 | } 414 | } 415 | -------------------------------------------------------------------------------- /src/output_writer.rs: -------------------------------------------------------------------------------- 1 | use std::u16; 2 | 3 | use crate::huffman_table::{ 4 | get_distance_code, get_length_code, END_OF_BLOCK_POSITION, NUM_DISTANCE_CODES, 5 | NUM_LITERALS_AND_LENGTHS, 6 | }; 7 | use crate::lzvalue::LZValue; 8 | 9 | /// The type used for representing how many times a literal, length or distance code has been output 10 | /// to the current buffer. 11 | /// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using 12 | /// 16-bit values. 13 | pub type FrequencyType = u16; 14 | 15 | /// The maximum number of literals/lengths in the buffer, which in practice also means the maximum 16 | /// number of literals/lengths output before a new block is started. 17 | /// This should not be larger than the maximum value `FrequencyType` can represent to prevent 18 | /// overflowing (which would degrade, or in the worst case break compression). 19 | pub const MAX_BUFFER_LENGTH: usize = 1024 * 31; 20 | 21 | #[derive(Debug, PartialEq)] 22 | pub enum BufferStatus { 23 | NotFull, 24 | Full, 25 | } 26 | 27 | /// Struct that buffers lz77 data and keeps track of the usage of different codes 28 | pub struct DynamicWriter { 29 | buffer: Vec, 30 | // The two last length codes are not actually used, but only participates in code construction 31 | // Therefore, we ignore them to get the correct number of lengths 32 | frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS], 33 | distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES], 34 | } 35 | 36 | impl DynamicWriter { 37 | #[inline] 38 | pub fn check_buffer_length(&self) -> BufferStatus { 39 | if self.buffer.len() >= MAX_BUFFER_LENGTH { 40 | BufferStatus::Full 41 | } else { 42 | BufferStatus::NotFull 43 | } 44 | } 45 | 46 | #[inline] 47 | pub fn write_literal(&mut self, literal: u8) -> BufferStatus { 48 | debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH); 49 | self.buffer.push(LZValue::literal(literal)); 50 | self.frequencies[usize::from(literal)] += 1; 51 | self.check_buffer_length() 52 | } 53 | 54 | #[inline] 55 | pub fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus { 56 | self.buffer.push(LZValue::length_distance(length, distance)); 57 | let l_code_num = get_length_code(length); 58 | // As we limit the buffer to 2^16 values, this should be safe from overflowing. 59 | self.frequencies[l_code_num] += 1; 60 | 61 | let d_code_num = get_distance_code(distance); 62 | // The compiler seems to be able to evade the bounds check here somehow. 63 | self.distance_frequencies[usize::from(d_code_num)] += 1; 64 | self.check_buffer_length() 65 | } 66 | 67 | pub fn buffer_length(&self) -> usize { 68 | self.buffer.len() 69 | } 70 | 71 | pub fn get_buffer(&self) -> &[LZValue] { 72 | &self.buffer 73 | } 74 | 75 | pub fn new() -> DynamicWriter { 76 | let mut w = DynamicWriter { 77 | buffer: Vec::with_capacity(MAX_BUFFER_LENGTH), 78 | frequencies: [0; NUM_LITERALS_AND_LENGTHS], 79 | distance_frequencies: [0; NUM_DISTANCE_CODES], 80 | }; 81 | // This will always be 1, 82 | // since there will always only be one end of block marker in each block 83 | w.frequencies[END_OF_BLOCK_POSITION] = 1; 84 | w 85 | } 86 | 87 | /// Special output function used with RLE compression 88 | /// that avoids bothering to lookup a distance code. 89 | #[inline] 90 | pub fn write_length_rle(&mut self, length: u16) -> BufferStatus { 91 | self.buffer.push(LZValue::length_distance(length, 1)); 92 | let l_code_num = get_length_code(length); 93 | // As we limit the buffer to 2^16 values, this should be safe from overflowing. 94 | self.frequencies[l_code_num] += 1; 95 | 96 | self.distance_frequencies[0] += 1; 97 | self.check_buffer_length() 98 | } 99 | 100 | pub fn get_frequencies(&self) -> (&[u16], &[u16]) { 101 | (&self.frequencies, &self.distance_frequencies) 102 | } 103 | 104 | pub fn clear_frequencies(&mut self) { 105 | self.frequencies = [0; NUM_LITERALS_AND_LENGTHS]; 106 | self.distance_frequencies = [0; NUM_DISTANCE_CODES]; 107 | self.frequencies[END_OF_BLOCK_POSITION] = 1; 108 | } 109 | 110 | pub fn clear_data(&mut self) { 111 | self.buffer.clear() 112 | } 113 | 114 | pub fn clear(&mut self) { 115 | self.clear_frequencies(); 116 | self.clear_data(); 117 | } 118 | } 119 | 120 | #[cfg(test)] 121 | mod test { 122 | use super::*; 123 | use crate::huffman_table::{get_distance_code, get_length_code}; 124 | #[test] 125 | /// Ensure that these function won't produce values that would overflow the output_writer 126 | /// tables since we use some unsafe indexing. 127 | fn array_bounds() { 128 | let w = DynamicWriter::new(); 129 | 130 | for i in 0..u16::max_value() { 131 | assert!(get_length_code(i) < w.frequencies.len()); 132 | } 133 | 134 | for i in 0..u16::max_value() { 135 | assert!(get_distance_code(i) < w.distance_frequencies.len() as u8); 136 | } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/rle.rs: -------------------------------------------------------------------------------- 1 | use crate::lz77::{buffer_full, ProcessStatus}; 2 | use crate::output_writer::{BufferStatus, DynamicWriter}; 3 | 4 | use std::cmp; 5 | use std::ops::Range; 6 | 7 | const MIN_MATCH: usize = crate::huffman_table::MIN_MATCH as usize; 8 | const MAX_MATCH: usize = crate::huffman_table::MAX_MATCH as usize; 9 | 10 | /// Simple match function for run-length encoding. 11 | /// 12 | /// Checks how many of the next bytes from the start of the slice `data` matches prev. 13 | fn get_match_length_rle(data: &[u8], prev: u8) -> usize { 14 | data.iter() 15 | .take(MAX_MATCH) 16 | .take_while(|&&b| b == prev) 17 | .count() 18 | } 19 | 20 | /// L77-Compress data using the RLE(Run-length encoding) strategy 21 | /// 22 | /// This function simply looks for runs of data of at least length 3. 23 | pub fn process_chunk_greedy_rle( 24 | data: &[u8], 25 | iterated_data: &Range, 26 | writer: &mut DynamicWriter, 27 | ) -> (usize, ProcessStatus) { 28 | if data.is_empty() { 29 | return (0, ProcessStatus::Ok); 30 | }; 31 | 32 | let end = cmp::min(data.len(), iterated_data.end); 33 | // Start on at least byte 1. 34 | let start = cmp::max(iterated_data.start, 1); 35 | // The previous byte. 36 | let mut prev = data[start - 1]; 37 | // Iterate through the requested range, but avoid going off the end. 38 | let current_chunk = &data[cmp::min(start, end)..end]; 39 | let mut insert_it = current_chunk.iter().enumerate(); 40 | let mut overlap = 0; 41 | // Make sure to output the first byte 42 | if iterated_data.start == 0 && !data.is_empty() { 43 | write_literal!(writer, data[0], 1); 44 | } 45 | 46 | while let Some((n, &b)) = insert_it.next() { 47 | let position = n + start; 48 | let match_len = if prev == b { 49 | //TODO: Avoid comparing with self here. 50 | // Would use as_slice() but that doesn't work on an enumerated iterator. 51 | get_match_length_rle(&data[position..], prev) 52 | } else { 53 | 0 54 | }; 55 | if match_len >= MIN_MATCH { 56 | if position + match_len > end { 57 | overlap = position + match_len - end; 58 | }; 59 | let b_status = writer.write_length_rle(match_len as u16); 60 | if b_status == BufferStatus::Full { 61 | return (overlap, buffer_full(position + match_len)); 62 | } 63 | insert_it.nth(match_len - 2); 64 | } else { 65 | write_literal!(writer, b, position + 1); 66 | } 67 | prev = b; 68 | } 69 | 70 | (overlap, ProcessStatus::Ok) 71 | } 72 | 73 | #[cfg(test)] 74 | mod test { 75 | use super::*; 76 | use crate::lzvalue::{ld, lit, LZValue}; 77 | 78 | fn l(c: char) -> LZValue { 79 | lit(c as u8) 80 | } 81 | 82 | #[test] 83 | fn rle_compress() { 84 | let input = b"textaaaaaaaaatext"; 85 | let mut w = DynamicWriter::new(); 86 | let r = 0..input.len(); 87 | let (overlap, _) = process_chunk_greedy_rle(input, &r, &mut w); 88 | let expected = [ 89 | l('t'), 90 | l('e'), 91 | l('x'), 92 | l('t'), 93 | l('a'), 94 | ld(8, 1), 95 | l('t'), 96 | l('e'), 97 | l('x'), 98 | l('t'), 99 | ]; 100 | //println!("expected: {:?}", expected); 101 | //println!("actual: {:?}", w.get_buffer()); 102 | assert!(w.get_buffer() == expected); 103 | assert_eq!(overlap, 0); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/stored_block.rs: -------------------------------------------------------------------------------- 1 | use crate::bitstream::LsbWriter; 2 | use std::io; 3 | use std::io::Write; 4 | use std::u16; 5 | 6 | #[cfg(test)] 7 | const BLOCK_SIZE: u16 = 32000; 8 | 9 | const STORED_FIRST_BYTE: u8 = 0b0000_0000; 10 | pub const STORED_FIRST_BYTE_FINAL: u8 = 0b0000_0001; 11 | pub const MAX_STORED_BLOCK_LENGTH: usize = (u16::MAX as usize) / 2; 12 | 13 | pub fn write_stored_header(writer: &mut LsbWriter, final_block: bool) { 14 | let header = if final_block { 15 | STORED_FIRST_BYTE_FINAL 16 | } else { 17 | STORED_FIRST_BYTE 18 | }; 19 | // Write the block header 20 | writer.write_bits(header.into(), 3); 21 | // Flush the writer to make sure we are aligned to the byte boundary. 22 | writer.flush_raw(); 23 | } 24 | 25 | // Compress one stored block (excluding the header) 26 | pub fn compress_block_stored(input: &[u8], writer: &mut W) -> io::Result { 27 | if input.len() > u16::max_value() as usize { 28 | return Err(io::Error::new( 29 | io::ErrorKind::InvalidInput, 30 | "Stored block too long!", 31 | )); 32 | }; 33 | // The header is written before this function. 34 | // The next two bytes indicates the length 35 | writer.write_all(&(input.len() as u16).to_le_bytes())?; 36 | // the next two after the length is the ones complement of the length 37 | writer.write_all(&(!input.len() as u16).to_le_bytes())?; 38 | // After this the data is written directly with no compression 39 | writer.write(input) 40 | } 41 | 42 | #[cfg(test)] 43 | pub fn compress_data_stored(input: &[u8]) -> Vec { 44 | let block_length = BLOCK_SIZE as usize; 45 | 46 | let mut output = Vec::with_capacity(input.len() + 2); 47 | let mut i = input.chunks(block_length).peekable(); 48 | while let Some(chunk) = i.next() { 49 | let last_chunk = i.peek().is_none(); 50 | // First bit tells us if this is the final chunk 51 | // the next two details compression type (none in this case) 52 | let first_byte = if last_chunk { 53 | STORED_FIRST_BYTE_FINAL 54 | } else { 55 | STORED_FIRST_BYTE 56 | }; 57 | output.write(&[first_byte]).unwrap(); 58 | 59 | compress_block_stored(chunk, &mut output).unwrap(); 60 | } 61 | output 62 | } 63 | 64 | #[cfg(test)] 65 | mod test { 66 | use super::*; 67 | use crate::test_utils::decompress_to_end; 68 | 69 | #[test] 70 | fn no_compression_one_chunk() { 71 | let test_data = vec![1u8, 2, 3, 4, 5, 6, 7, 8]; 72 | let compressed = compress_data_stored(&test_data); 73 | let result = decompress_to_end(&compressed); 74 | assert_eq!(test_data, result); 75 | } 76 | 77 | #[test] 78 | fn no_compression_multiple_chunks() { 79 | let test_data = vec![32u8; 40000]; 80 | let compressed = compress_data_stored(&test_data); 81 | let result = decompress_to_end(&compressed); 82 | assert_eq!(test_data, result); 83 | } 84 | 85 | #[test] 86 | fn no_compression_string() { 87 | let test_data = String::from( 88 | "This is some text, this is some more text, this is even \ 89 | more text, lots of text here.", 90 | ) 91 | .into_bytes(); 92 | let compressed = compress_data_stored(&test_data); 93 | let result = decompress_to_end(&compressed); 94 | assert_eq!(test_data, result); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/test_utils.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | 3 | #[cfg(feature = "gzip")] 4 | use gzip_header::GzHeader; 5 | 6 | fn get_test_file_data(name: &str) -> Vec { 7 | use std::fs::File; 8 | use std::io::Read; 9 | let mut input = Vec::new(); 10 | let mut f = File::open(name).unwrap(); 11 | 12 | f.read_to_end(&mut input).unwrap(); 13 | input 14 | } 15 | 16 | pub fn get_test_data() -> Vec { 17 | use std::env; 18 | let path = env::var("TEST_FILE").unwrap_or("tests/pg11.txt".to_string()); 19 | get_test_file_data(&path) 20 | } 21 | 22 | /// Helper function to decompress into a `Vec` 23 | pub fn decompress_to_end(input: &[u8]) -> Vec { 24 | use miniz_oxide::inflate::decompress_to_vec; 25 | 26 | decompress_to_vec(input).expect("Decompression failed!") 27 | } 28 | 29 | #[cfg(feature = "gzip")] 30 | pub fn decompress_gzip(compressed: &[u8]) -> (GzHeader, Vec) { 31 | use gzip_header::{read_gz_header, Crc}; 32 | use std::io::Cursor; 33 | let mut c = Cursor::new(compressed); 34 | let h = read_gz_header(&mut c).expect("Failed to decode gzip header!"); 35 | let pos = c.position(); 36 | let compressed = &c.into_inner()[pos as usize..]; 37 | 38 | let result = miniz_oxide::inflate::decompress_to_vec(compressed).expect("Decompression failed"); 39 | 40 | let s = compressed.len(); 41 | 42 | let crc = u32::from_le_bytes([ 43 | compressed[s - 8], 44 | compressed[s - 7], 45 | compressed[s - 6], 46 | compressed[s - 5], 47 | ]); 48 | let len = u32::from_le_bytes([ 49 | compressed[s - 4], 50 | compressed[s - 3], 51 | compressed[s - 2], 52 | compressed[s - 1], 53 | ]); 54 | 55 | let mut comp_crc = Crc::new(); 56 | comp_crc.update(&result); 57 | 58 | assert_eq!( 59 | crc, 60 | comp_crc.sum(), 61 | "Checksum failed File: {}, computed: {}", 62 | crc, 63 | comp_crc.sum() 64 | ); 65 | assert_eq!(len, result.len() as u32, "Length mismatch"); 66 | 67 | (h, result) 68 | } 69 | 70 | pub fn decompress_zlib(compressed: &[u8]) -> Vec { 71 | miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).expect("Decompression failed!") 72 | } 73 | -------------------------------------------------------------------------------- /src/zlib.rs: -------------------------------------------------------------------------------- 1 | //! This module contains functionality for generating a [zlib](https://tools.ietf.org/html/rfc1950) 2 | //! header. 3 | //! 4 | //! The Zlib header contains some metadata (a window size and a compression level), and optionally 5 | //! a block of data serving as an extra dictionary for the compressor/decompressor. 6 | //! The dictionary is not implemented in this library. 7 | //! The data in the header aside from the dictionary doesn't actually have any effect on the 8 | //! decompressed data, it only offers some hints for the decompressor on how the data was 9 | //! compressed. 10 | 11 | use std::io::{Result, Write}; 12 | 13 | // CM = 8 means to use the DEFLATE compression method. 14 | const DEFAULT_CM: u8 = 8; 15 | // CINFO = 7 Indicates a 32k window size. 16 | const DEFAULT_CINFO: u8 = 7 << 4; 17 | const DEFAULT_CMF: u8 = DEFAULT_CM | DEFAULT_CINFO; 18 | 19 | // No dict by default. 20 | #[cfg(test)] 21 | const DEFAULT_FDICT: u8 = 0; 22 | // FLEVEL = 0 means fastest compression algorithm. 23 | const _DEFAULT_FLEVEL: u8 = 0 << 7; 24 | 25 | // The 16-bit value consisting of CMF and FLG must be divisible by this to be valid. 26 | const FCHECK_DIVISOR: u8 = 31; 27 | 28 | #[allow(dead_code)] 29 | #[repr(u8)] 30 | pub enum CompressionLevel { 31 | Fastest = 0 << 6, 32 | Fast = 1 << 6, 33 | Default = 2 << 6, 34 | Maximum = 3 << 6, 35 | } 36 | 37 | /// Generate FCHECK from CMF and FLG (without FCKECH )so that they are correct according to the 38 | /// specification, i.e (CMF*256 + FCHK) % 31 = 0. 39 | /// Returns flg with the FCHKECK bits added (any existing FCHECK bits are ignored). 40 | fn add_fcheck(cmf: u8, flg: u8) -> u8 { 41 | let rem = ((cmf as usize * 256) + flg as usize) % FCHECK_DIVISOR as usize; 42 | 43 | // Clear existing FCHECK if any 44 | let flg = flg & 0b11100000; 45 | 46 | // Casting is safe as rem can't overflow since it is a value mod 31 47 | // We can simply add the value to flg as (31 - rem) will never be above 2^5 48 | flg + (FCHECK_DIVISOR - rem as u8) 49 | } 50 | 51 | /// Write a zlib header with an empty dictionary to the writer using the specified 52 | /// compression level preset. 53 | pub fn write_zlib_header(writer: &mut W, level: CompressionLevel) -> Result<()> { 54 | writer.write_all(&get_zlib_header(level)) 55 | } 56 | 57 | /// Get the zlib header for the `CompressionLevel` level using the default window size and no 58 | /// dictionary. 59 | pub fn get_zlib_header(level: CompressionLevel) -> [u8; 2] { 60 | let cmf = DEFAULT_CMF; 61 | [cmf, add_fcheck(cmf, level as u8)] 62 | } 63 | 64 | #[cfg(test)] 65 | mod test { 66 | use super::DEFAULT_CMF; 67 | use super::*; 68 | 69 | #[test] 70 | fn test_gen_fcheck() { 71 | let cmf = DEFAULT_CMF; 72 | let flg = super::add_fcheck( 73 | DEFAULT_CMF, 74 | CompressionLevel::Default as u8 | super::DEFAULT_FDICT, 75 | ); 76 | assert_eq!(((usize::from(cmf) * 256) + usize::from(flg)) % 31, 0); 77 | } 78 | 79 | #[test] 80 | fn test_header() { 81 | let header = get_zlib_header(CompressionLevel::Fastest); 82 | assert_eq!( 83 | ((usize::from(header[0]) * 256) + usize::from(header[1])) % 31, 84 | 0 85 | ); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /tests/afl/default/id_000000,sig_06,src_000831,op_havoc,rep_64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000000,sig_06,src_000831,op_havoc,rep_64 -------------------------------------------------------------------------------- /tests/afl/default/id_000001,sig_06,src_000891,op_havoc,rep_8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000001,sig_06,src_000891,op_havoc,rep_8 -------------------------------------------------------------------------------- /tests/afl/default/id_000002,sig_06,src_000891,op_havoc,rep_8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000002,sig_06,src_000891,op_havoc,rep_8 -------------------------------------------------------------------------------- /tests/afl/default/id_000003,sig_06,src_000891,op_havoc,rep_16: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000003,sig_06,src_000891,op_havoc,rep_16 -------------------------------------------------------------------------------- /tests/afl/default/id_000005,sig_06,src_000995,op_flip1,pos_65535: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000005,sig_06,src_000995,op_flip1,pos_65535 -------------------------------------------------------------------------------- /tests/afl/default/id_000006,sig_06,src_000995,op_ext_AO,pos_65534: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000006,sig_06,src_000995,op_ext_AO,pos_65534 -------------------------------------------------------------------------------- /tests/afl/default/id_000007,sig_06,src_001004,op_flip1,pos_15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000007,sig_06,src_001004,op_flip1,pos_15 -------------------------------------------------------------------------------- /tests/afl/default/id_000008,sig_06,src_001004,op_flip1,pos_15: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000008,sig_06,src_001004,op_flip1,pos_15 -------------------------------------------------------------------------------- /tests/afl/default/id_000009,sig_06,src_001004,op_flip1,pos_238: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000009,sig_06,src_001004,op_flip1,pos_238 -------------------------------------------------------------------------------- /tests/afl/default/id_000010,sig_06,src_001004,op_flip1,pos_554: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000010,sig_06,src_001004,op_flip1,pos_554 -------------------------------------------------------------------------------- /tests/afl/default/id_000011,sig_06,src_001004,op_flip1,pos_1954: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000011,sig_06,src_001004,op_flip1,pos_1954 -------------------------------------------------------------------------------- /tests/afl/default/id_000012,sig_06,src_001004,op_flip1,pos_1955: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000012,sig_06,src_001004,op_flip1,pos_1955 -------------------------------------------------------------------------------- /tests/afl/default/id_000013,sig_06,src_001004,op_flip1,pos_1958: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000013,sig_06,src_001004,op_flip1,pos_1958 -------------------------------------------------------------------------------- /tests/afl/default/id_000014,sig_06,src_001004,op_flip1,pos_1963: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000014,sig_06,src_001004,op_flip1,pos_1963 -------------------------------------------------------------------------------- /tests/afl/default/id_000015,sig_06,src_001004,op_flip1,pos_6662: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000015,sig_06,src_001004,op_flip1,pos_6662 -------------------------------------------------------------------------------- /tests/afl/default/id_000016,sig_06,src_001004,op_flip1,pos_15144: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000016,sig_06,src_001004,op_flip1,pos_15144 -------------------------------------------------------------------------------- /tests/afl/default/id_000017,sig_06,src_001004,op_flip1,pos_15321: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000017,sig_06,src_001004,op_flip1,pos_15321 -------------------------------------------------------------------------------- /tests/afl/default/id_000018,sig_06,src_001004,op_flip1,pos_16334: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000018,sig_06,src_001004,op_flip1,pos_16334 -------------------------------------------------------------------------------- /tests/afl/default/id_000019,sig_06,src_001004,op_flip1,pos_17475: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000019,sig_06,src_001004,op_flip1,pos_17475 -------------------------------------------------------------------------------- /tests/afl/default/id_000020,sig_06,src_001004,op_flip1,pos_18334: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000020,sig_06,src_001004,op_flip1,pos_18334 -------------------------------------------------------------------------------- /tests/afl/default/id_000021,sig_06,src_001004,op_flip1,pos_20365: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000021,sig_06,src_001004,op_flip1,pos_20365 -------------------------------------------------------------------------------- /tests/afl/default/id_000022,sig_06,src_001004,op_flip1,pos_20500: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000022,sig_06,src_001004,op_flip1,pos_20500 -------------------------------------------------------------------------------- /tests/afl/default/id_000023,sig_06,src_001004,op_flip1,pos_20513: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000023,sig_06,src_001004,op_flip1,pos_20513 -------------------------------------------------------------------------------- /tests/afl/default/id_000024,sig_06,src_001004,op_flip1,pos_20518: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000024,sig_06,src_001004,op_flip1,pos_20518 -------------------------------------------------------------------------------- /tests/afl/default/id_000025,sig_06,src_001004,op_flip1,pos_20521: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000025,sig_06,src_001004,op_flip1,pos_20521 -------------------------------------------------------------------------------- /tests/afl/default/id_000026,sig_06,src_001004,op_flip1,pos_20522: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000026,sig_06,src_001004,op_flip1,pos_20522 -------------------------------------------------------------------------------- /tests/afl/default/id_000027,sig_06,src_001004,op_flip1,pos_20525: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000027,sig_06,src_001004,op_flip1,pos_20525 -------------------------------------------------------------------------------- /tests/afl/default/id_000028,sig_06,src_001004,op_flip1,pos_20527: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000028,sig_06,src_001004,op_flip1,pos_20527 -------------------------------------------------------------------------------- /tests/afl/default/id_000029,sig_06,src_001004,op_flip1,pos_20550: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000029,sig_06,src_001004,op_flip1,pos_20550 -------------------------------------------------------------------------------- /tests/afl/default/id_000030,sig_06,src_001004,op_flip1,pos_25139: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000030,sig_06,src_001004,op_flip1,pos_25139 -------------------------------------------------------------------------------- /tests/afl/default/id_000031,sig_06,src_001004,op_flip1,pos_25204: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000031,sig_06,src_001004,op_flip1,pos_25204 -------------------------------------------------------------------------------- /tests/afl/default/id_000032,sig_06,src_001004,op_flip1,pos_32259: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000032,sig_06,src_001004,op_flip1,pos_32259 -------------------------------------------------------------------------------- /tests/afl/default/id_000033,sig_06,src_001004,op_flip1,pos_44443: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000033,sig_06,src_001004,op_flip1,pos_44443 -------------------------------------------------------------------------------- /tests/afl/default/id_000034,sig_06,src_001004,op_flip1,pos_50703: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000034,sig_06,src_001004,op_flip1,pos_50703 -------------------------------------------------------------------------------- /tests/afl/default/id_000035,sig_06,src_001004,op_flip1,pos_57865: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000035,sig_06,src_001004,op_flip1,pos_57865 -------------------------------------------------------------------------------- /tests/afl/default/id_000036,sig_06,src_001004,op_flip1,pos_63260: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000036,sig_06,src_001004,op_flip1,pos_63260 -------------------------------------------------------------------------------- /tests/afl/default/id_000037,sig_06,src_001004,op_flip2,pos_40352: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000037,sig_06,src_001004,op_flip2,pos_40352 -------------------------------------------------------------------------------- /tests/afl/default/id_000038,sig_06,src_001004,op_arith8,pos_15330,val_-31: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000038,sig_06,src_001004,op_arith8,pos_15330,val_-31 -------------------------------------------------------------------------------- /tests/afl/default/id_000039,sig_06,src_001004,op_arith8,pos_32239,val_-30: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000039,sig_06,src_001004,op_arith8,pos_32239,val_-30 -------------------------------------------------------------------------------- /tests/afl/default/id_000040,sig_06,src_001004,op_int32,pos_10338,val_be_+512: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000040,sig_06,src_001004,op_int32,pos_10338,val_be_+512 -------------------------------------------------------------------------------- /tests/afl/default/id_000041,sig_06,src_001004,op_int32,pos_20438,val_be_+1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000041,sig_06,src_001004,op_int32,pos_20438,val_be_+1 -------------------------------------------------------------------------------- /tests/afl/default/id_000042,sig_06,src_001004,op_int32,pos_23295,val_+100: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000042,sig_06,src_001004,op_int32,pos_23295,val_+100 -------------------------------------------------------------------------------- /tests/afl/default/id_000043,sig_06,src_001004,op_int32,pos_25214,val_+16: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000043,sig_06,src_001004,op_int32,pos_25214,val_+16 -------------------------------------------------------------------------------- /tests/afl/default/id_000044,sig_06,src_001004,op_ext_AO,pos_55585: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000044,sig_06,src_001004,op_ext_AO,pos_55585 -------------------------------------------------------------------------------- /tests/afl/default/id_000045,sig_06,src_001006,op_havoc,rep_8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/afl/default/id_000045,sig_06,src_001006,op_havoc,rep_8 -------------------------------------------------------------------------------- /tests/issue_18_201911.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/issue_18_201911.bin -------------------------------------------------------------------------------- /tests/issue_44.zlib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/issue_44.zlib -------------------------------------------------------------------------------- /tests/short.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/image-rs/deflate-rs/3262c2526f7efebea7d3da73f5ec547860f82d46/tests/short.bin -------------------------------------------------------------------------------- /tests/test.rs: -------------------------------------------------------------------------------- 1 | extern crate deflate; 2 | extern crate miniz_oxide; 3 | 4 | use deflate::CompressionOptions; 5 | use std::io::{Read, Write}; 6 | 7 | fn get_test_file_data(name: &str) -> Vec { 8 | use std::fs::File; 9 | let mut input = Vec::new(); 10 | let mut f = File::open(name).unwrap(); 11 | 12 | f.read_to_end(&mut input).unwrap(); 13 | input 14 | } 15 | 16 | fn get_test_data() -> Vec { 17 | use std::env; 18 | let path = env::var("TEST_FILE").unwrap_or_else(|_| "tests/pg11.txt".to_string()); 19 | get_test_file_data(&path) 20 | } 21 | 22 | fn roundtrip(data: &[u8]) { 23 | roundtrip_conf(data, CompressionOptions::default()) 24 | } 25 | 26 | fn roundtrip_conf(data: &[u8], level: CompressionOptions) { 27 | let compressed = deflate::deflate_bytes_zlib_conf(data, level); 28 | println!("Compressed len: {}, level: {:?}", compressed.len(), level); 29 | let decompressed = 30 | miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).expect("Decompression failed!"); 31 | assert!(decompressed.as_slice() == data); 32 | } 33 | 34 | // A test comparing the compression ratio of the library with flate2 35 | #[test] 36 | fn file_zlib_compare_output() { 37 | let test_data = get_test_data(); 38 | let flate2_compressed = miniz_oxide::deflate::compress_to_vec_zlib(&test_data, 10); 39 | 40 | // { 41 | // use std::fs::File; 42 | // use std::io::Write; 43 | // { 44 | // let mut f = File::create("out.deflate").unwrap(); 45 | // f.write_all(&deflate_compressed).unwrap(); 46 | // } 47 | // { 48 | // let mut f = File::create("out.flate2").unwrap(); 49 | // f.write_all(&flate2_compressed).unwrap(); 50 | // } 51 | // } 52 | 53 | println!("mz_oxide len: {}", flate2_compressed.len(),); 54 | 55 | roundtrip_conf(&test_data, CompressionOptions::high()); 56 | } 57 | 58 | #[test] 59 | fn block_type() { 60 | let test_file = "tests/short.bin"; 61 | let test_data = get_test_file_data(test_file); 62 | let compressed = deflate::deflate_bytes_zlib(&test_data); 63 | assert_eq!(compressed.len(), 30); 64 | 65 | roundtrip(b"test"); 66 | } 67 | 68 | #[test] 69 | fn issue_17() { 70 | // This is window size + 1 bytes long which made the hash table 71 | // slide when there was only the two end-bytes that don't need to be hashed left 72 | // and triggered an assertion. 73 | let data = vec![0; 65537]; 74 | 75 | roundtrip(&data); 76 | } 77 | 78 | #[ignore] 79 | #[test] 80 | fn issue_44() { 81 | // Data that results in overlap after non-first window. 82 | // Triggered the debug check due to overlap being added to 83 | // current_block_input_bytes when it should not have. 84 | // Test file is compressed to avoid wasting space, 85 | // and ignored by default due to slowness/memory use. 86 | let compr = get_test_file_data("tests/issue_44.zlib"); 87 | let data = miniz_oxide::inflate::decompress_to_vec_zlib(&compr) 88 | .expect("failed to decompress test file"); 89 | 90 | roundtrip(&data); 91 | } 92 | 93 | #[test] 94 | fn fast() { 95 | let test_data = get_test_data(); 96 | roundtrip_conf(&test_data, CompressionOptions::fast()); 97 | } 98 | 99 | #[test] 100 | fn rle() { 101 | use deflate::{deflate_bytes_conf, CompressionOptions}; 102 | let test_data = get_test_data(); 103 | let compressed = deflate_bytes_conf(&test_data, CompressionOptions::rle()); 104 | let decompressed = 105 | miniz_oxide::inflate::decompress_to_vec(&compressed).expect("Decompression failed!"); 106 | 107 | println!("Input size: {}", test_data.len()); 108 | println!("Rle compressed len: {}", compressed.len()); 109 | 110 | assert!(test_data == decompressed); 111 | } 112 | 113 | #[test] 114 | fn issue_26() { 115 | use deflate::write::ZlibEncoder; 116 | let fp = Vec::new(); 117 | let mut fp = ZlibEncoder::new(fp, CompressionOptions::default()); 118 | 119 | fp.write(&[0]).unwrap(); 120 | fp.flush().unwrap(); 121 | fp.write(&[0]).unwrap(); 122 | fp.write(&[0, 0]).unwrap(); 123 | } 124 | 125 | #[cfg(feature = "gzip")] 126 | #[test] 127 | fn issue_26_gzip() { 128 | use deflate::write::DeflateEncoder; 129 | let fp = Vec::new(); 130 | let mut fp = DeflateEncoder::new(fp, CompressionOptions::default()); 131 | 132 | fp.write(&[0]).unwrap(); 133 | fp.flush().unwrap(); 134 | fp.write(&[0]).unwrap(); 135 | fp.write(&[0, 0]).unwrap(); 136 | } 137 | 138 | #[test] 139 | fn issue_18_201911() { 140 | let test_file = "tests/issue_18_201911.bin"; 141 | let test_data = get_test_file_data(test_file); 142 | // This was the failing compression mode. 143 | roundtrip_conf(&test_data, deflate::Compression::Fast.into()); 144 | roundtrip_conf(&test_data, CompressionOptions::default()); 145 | } 146 | 147 | #[test] 148 | fn afl_regressions_default_compression() { 149 | for entry in std::fs::read_dir("tests/afl/default").unwrap() { 150 | let entry = entry.unwrap(); 151 | let test_file = entry.path(); 152 | if test_file.is_file() { 153 | let test_filename = test_file.to_str().unwrap(); 154 | println!("{}", test_filename); 155 | let test_data = get_test_file_data(test_filename); 156 | // This was the failing compression mode. 157 | roundtrip_conf(&test_data, CompressionOptions::default()); 158 | roundtrip_conf(&test_data, deflate::Compression::Fast.into()); 159 | } 160 | } 161 | } 162 | 163 | mod issue_47 { 164 | use std::io::{self, Write}; 165 | 166 | #[test] 167 | fn issue_47() { 168 | let _ = deflate::write::ZlibEncoder::new( 169 | SmallWriter::new(vec![], 2), 170 | deflate::Compression::Fast, 171 | ) 172 | .flush(); 173 | } 174 | 175 | struct SmallWriter { 176 | writer: W, 177 | small: usize, 178 | } 179 | 180 | impl SmallWriter { 181 | fn new(writer: W, buf_len: usize) -> SmallWriter { 182 | SmallWriter { 183 | writer, 184 | small: buf_len, 185 | } 186 | } 187 | } 188 | 189 | impl Write for SmallWriter { 190 | fn write(&mut self, buf: &[u8]) -> io::Result { 191 | // Never write more than `small` bytes at a time. 192 | let small = buf.len().min(self.small); 193 | self.writer.write(&buf[..small]) 194 | } 195 | 196 | fn flush(&mut self) -> io::Result<()> { 197 | Ok(()) 198 | } 199 | } 200 | } 201 | --------------------------------------------------------------------------------